Click the Original, Code and Reconstruction tabs to read about the issues and how they were fixed.
Objective
Explain the objective of the original data visualisation and the targetted audience.
The visualisation chosen had the following three main issues:
Reference
The following code was used to fix the issues identified in the original.
library(rvest)
library(dplyr)
library(gdata)
library(tidyverse)
library(ggplot2)
library(scales)
library(reprex)
library(ggh4x)
library(repr)
library(ggthemes)
library(ragg)
#web scrapping the data
url <- "https://en.m.wikipedia.org/wiki/List_of_most_expensive_music_videos"
v <- read_html(url)
v %>% html_nodes('table')
## {xml_nodeset (5)}
## [1] <table class="box-Multiple_issues plainlinks metadata ambox ambox-content ...
## [2] <table class="box-More_citations_needed plainlinks metadata ambox ambox-c ...
## [3] <table class="box-Unreliable_sources plainlinks metadata ambox ambox-cont ...
## [4] <table class="wikitable sortable plainrowheaders">\n<caption>Most expensi ...
## [5] <table class="wikitable plainrowheaders">\n<caption>Timeline of most expe ...
video <- v %>% html_nodes('table') %>% .[[4]] %>% html_table(header=FALSE, fill=TRUE) %>% slice(-1 * 1:2 )
video %>% as_tibble()
## # A tibble: 76 × 8
## X1 X2 X3 X4 X5 X6 X7 X8
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 1 "\"Scream\"" Michael… Mark… 1995 $7,0… $12,… [3][…
## 2 2 "\"Die Another Day\"" Madonna Trak… 2002 $6,1… $9,1… [3][…
## 3 3 "\"Express Yourself\"" Madonna Davi… 1989 $5,0… $10,… [3][…
## 4 3 "\"Bedtime Story\"" Madonna Mark… 1995 $5,0… $8,8… [3][…
## 5 5 "\"Estranged\"" Guns N'… Andy… 1993 $5,0… $9,3… [3][…
## 6 6 "\"Black or White\"" Michael… John… 1991 $4,0… $7,9… [3][…
## 7 6 "\"Make Me Like You\"" Gwen St… Soph… 2016 $4,0… $4,5… [5]
## 8 8 "\"Cartoon Heroes\"" Aqua Thom… 2000 $3,5… $5,5… [6]
## 9 9 "\"Rollin' (Air Raid Vehicle)\"" Limp Bi… Fred… 2000 $3,0… $4,6… [7]
## 10 10 "\"Victory\"" Puff Da… Marc… 1998 $2,7… $4,4… [8]
## # … with 66 more rows
#adding the column names
col_names <- c("Rank" ,"Title", "Artist(s)", "Director", "Year", "Cost Nominal (est.)", "Cost_Adjusted_(est.)", "Ref")
colnames(video) <- col_names
#converting into data frame
video <- as.data.frame(video)
#deleting the ref column
video <- subset(video, select = -c(Ref) )
#remove rank column(because there is a repeation of numbers)
video <- subset(video, select = -c(Rank) )
#Cost column
video$`Cost_Adjusted_(est.)` <- gsub(",", "", video$`Cost_Adjusted_(est.)`)
video$`Cost_Adjusted_(est.)` <- gsub("\\$", "", video$`Cost_Adjusted_(est.)`)
video$`Cost_Adjusted_(est.)` <- as.integer(video$`Cost_Adjusted_(est.)`)
#Title column
video$Title <- gsub("\"", "", video$Title)
#Artist column(Removing the data in brackets)
video$`Artist(s)` <- gsub(r"{\s*\([^\)]+\)}","",as.character(video$`Artist(s)`))
#Year column
video$Year <- as.integer(video$Year)
#adding an index column
video <- tibble::rowid_to_column(video, "index")
#add a new column to distinguish the decades
video <- video %>% mutate(Year1 = case_when(Year < 1990 ~ "1980s" , Year < 2000 & Year >= 1990 ~ "1990s", Year >=2000 & Year < 2010 ~ "2000s", Year >= 2010 ~ "2010s") )
#adding a new Music Genre column
#Genre
video <- video %>% mutate(Genre1 = ifelse(index %in% c(10,11, 14, 20, 27,29,30,32,46,6,12),"Hip-hop",""))
video <- video %>% mutate(Genre2 = ifelse(index %in% c(1,18,39,40,43,47,76), "R&B", ""))
video <- video %>% mutate(Genre3 = ifelse(index %in% c(2,3,28,44,45,62,64,74,75),"Dance-pop",""))
video <- video %>% mutate(Genre4 = ifelse(index %in% c(42,50, 53, 55,65),"K-pop",""))
video <- video %>% mutate(Genre5 = ifelse(index %in% c(7,49,70,73,52), "Pop",""))
video <- video %>% mutate(Genre6 = ifelse(index %in% c(16,17,25,38,41, 61,63), "Pop-rock",""))
video <- video %>% mutate(Genre7 = ifelse(index %in% c(23,24,33,22,48), "J-pop",""))
video <- video %>% mutate(Genre8 = ifelse(index %in% c(4,5,8,9,13,15,19,21,26,31,34,35,36,37,51,54,56,57,58,59,60,66,67,69,71,72,68), "Other",""))
video$Genre <- paste(video$Genre1, video$Genre2, video$Genre3, video$Genre4, video$Genre5, video$Genre6, video$Genre7, video$Genre8)
video <- subset(video, select = -c(Genre1,Genre2,Genre3,Genre4,Genre5,Genre6,Genre7,Genre8) )
#removing any extra spaces in the column
video$Genre <- gsub(" ", "", video$Genre)
#choosing the colors for the plot
colors <- list("#fbb4ae", "#b3cde3","#ccebc5","#decbe4", "#fed9a6", "#ffffcc","#e5d8bd","#fddaec")
#The code used to visualize the data:
expensive_videos <- ggplot(video, aes(x= reorder(Title, +`Cost_Adjusted_(est.)`), y = `Cost_Adjusted_(est.)`, fill = Genre )) +
geom_bar(stat = "identity", width = 0.8) +
geom_text(aes( y = 1,label = `Artist(s)`),hjust = 0, color = "black", family = "mono", size = (8) ) +
scale_fill_manual(values = colors) +
facet_wrap(~Year1,dir="v", scale = "free_y", ncol = 1,strip.position = "right") +
force_panelsizes(rows = c(2,8,6.2,6), cols = NULL) +
theme(strip.background = element_rect(colour="black",fill="black"),strip.placement = "outside", strip.text = element_text(face = "bold", size = (22), color = "white"),panel.spacing=unit(0.5, "lines")) +
scale_y_continuous(labels = unit_format(unit = "M", scale = 1e-6),position = "right") +
labs(title= "The Most Expensive Music Videos Off All Time", subtitle = "The Artist(s) Names Are Written Inside The Bars", x = "Video Title", y = "Cost in Millions ($USD)", fill = "Music Genre") +
theme(
plot.title = element_text(family = "Helvetica", face = "bold", size = (30), color = "black"),
plot.subtitle = element_text(family = "mono", face = "bold", size = (18),color = "black"),
legend.title = element_text(family = "Helvetica", face = "bold", size= (22), colour = "black"),
legend.text = element_text(family = "Helvetica", face = "bold", size = (20), colour = "black"),
axis.title = element_text(family = "Helvetica", face = "bold", size = (22), colour = "black"),
axis.text = element_text(family = "mono", face = "bold", size = (20), color = "black"),
legend.position="bottom"
)
expensive_videos <- expensive_videos + coord_flip()
Data Reference * Wikipedia (April, 2022), List of most expensive music videos,website: https://en.wikipedia.org/wiki/List_of_most_expensive_music_videos
The following plot fixes the main issues in the original.