library(rvest)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.4
## ✔ ggplot2 3.4.4 ✔ stringr 1.5.0
## ✔ lubridate 1.9.2 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ readr::guess_encoding() masks rvest::guess_encoding()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(kableExtra)
##
## Attaching package: 'kableExtra'
##
## The following object is masked from 'package:dplyr':
##
## group_rows
website<-"https://kworb.net/spotify/artist/6ydoSd3N2mwgwBHtF6K7eX_songs.html"
spotify<-read_html(website)
Song <-spotify%>%
html_nodes(".text a")%>%
html_text2()
Streams <-spotify%>%
html_nodes(".sortable .text+ td")%>%
html_text2()
Daily <-spotify%>%
html_nodes(".sortable .text~ td+ td")%>%
html_text2()
streamsdf <- data.frame(Song, Streams, Daily)
streamsdf$Streams <- as.numeric(gsub(",","",streamsdf$Streams))
streamsdf$Daily <- as.numeric(gsub(",","",streamsdf$Daily))
head(streamsdf)
## Song Streams Daily
## 1 Dancing On My Own 1202712025 504709
## 2 Where Are You Now 1155735957 763384
## 3 You Are The Reason 1014780683 440705
## 4 You Are The Reason - Duet Version 249602215 82868
## 5 Dancing On My Own - Tiësto Remix 150651519 66317
## 6 Whistle (feat. Calum Scott) 137917062 198952
I am using data from Spotify to compare the total number of streams of each song by the artist Calum Scott. I am using this data set because the song “Dancing On My Own” became extremely popular in Philadelphia during the 2022 Baseball World Series. I am analyzing how this song promoted the Philadelphia Phillie’s social media page and engaged fans. I want to highlight that “Dancing on My Own” is Calum Scott’s most streamed song on Spotify.
plot1 <- ggplot(streamsdf, aes(x=Song, y=Streams)) + geom_bar(stat="identity")
plot1
I started with a bar graph that included all songs from the data set. The graph is incomplete and the axis labels overlap. There is too much unnecessary information being displayed - therefore I made the decision to filter the data set.
filterstreams <- head(streamsdf, 8)
filterstreams
## Song Streams Daily
## 1 Dancing On My Own 1202712025 504709
## 2 Where Are You Now 1155735957 763384
## 3 You Are The Reason 1014780683 440705
## 4 You Are The Reason - Duet Version 249602215 82868
## 5 Dancing On My Own - Tiësto Remix 150651519 66317
## 6 Whistle (feat. Calum Scott) 137917062 198952
## 7 No Matter What 137102527 28457
## 8 Woke Up in Love 118822235 109367
plot2 <- ggplot(filterstreams, aes(x=Song, y=Streams)) + geom_bar(stat="identity") + labs(title="Calum Scott - Total Song Streams", x ="Song Title", y = "# of Streams") + theme(axis.text.x = element_text(angle = 45, hjust = 1))
plot2
Next, I filtered the data set and chose to only include Calum Scott’s Top 8 songs that were streamed on Spotfiy. I added titles, axis labels, and angled the x values to ensure that there would be no overlap between strings. This graph shows which songs are most popular and shows the information in an organized manner. However, I wanted to add dimensions that would enhance the look of the graph.
setwd("/Users/abbyjansen/Desktop/STAT 3280")
library(png)
library(ggpubr)
# website that showed me how to do this - https://www.edureka.co/community/58999/add-image-background-to-ggplot
img <- png::readPNG("srch_universalmusic_00602567140481-UK6KW1500205.png")
plot3 <- ggplot(filterstreams, aes(x = Song, y = Streams)) +
background_image(img) +
geom_bar(stat = "identity", fill = "skyblue", aes(label = Streams)) + # Add label for total streams
labs(title = "Calum Scott - Total Song Streams", x = "Song Title", y = "# of Streams") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning in geom_bar(stat = "identity", fill = "skyblue", aes(label = Streams)):
## Ignoring unknown aesthetics: label
plot3
I thought that it would be interesting to change the background image of the bar pgrah to the album cover of “Dancing On My Own”. This personalizes and adds a fun layer to the plot. I also made sure to change the bar graph colors because they would have blended into the background if they were not changed. I changed the color to powder blue - the color of old school Phillies retro jerseys.
plot4 <- ggplot(filterstreams, aes(x = Song, y = Streams)) +
geom_bar(stat = "identity", fill = "skyblue", aes(label = Streams)) + # Add label for total streams
labs(title = "Calum Scott - Total and Daily Song Streams", x = "Song Title", y = "# of Streams") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning in geom_bar(stat = "identity", fill = "skyblue", aes(label = Streams)):
## Ignoring unknown aesthetics: label
# website that showed me how to put labels on top of bars - https://www.geeksforgeeks.org/how-to-add-labels-over-each-bar-in-barplot-in-r/
plot4 <- plot4 +
geom_text(aes(label = Daily, y = Streams + Daily), vjust = -0.5)
plot4
I began to think that the background image was too much. I removed it and instead added text to the top of each bar graph that shares the number of daily streams of each song. I thought that this layer could be interesting because it shows which songs are the most popular in current time. For instance, even though “Dancing on My Own” has the most total streams, “Where Are You Now”, has the most daily streams which can indicate that the song is more popular currently.
library(treemap)
plot5 <- treemap(filterstreams,
index="Song",
vSize="Streams",
type="index",
)
plot5
## $tm
## Song vSize vColor stdErr vColorValue
## 1 Dancing On My Own - Tiësto Remix 150651519 1 150651519 NA
## 2 Dancing On My Own 1202712025 1 1202712025 NA
## 3 No Matter What 137102527 1 137102527 NA
## 4 Where Are You Now 1155735957 1 1155735957 NA
## 5 Whistle (feat. Calum Scott) 137917062 1 137917062 NA
## 6 Woke Up in Love 118822235 1 118822235 NA
## 7 You Are The Reason - Duet Version 249602215 1 249602215 NA
## 8 You Are The Reason 1014780683 1 1014780683 NA
## level x0 y0 w h color
## 1 1 0.5659382 0.0000000 0.2187833 0.1652350 #D3A362
## 2 1 0.0000000 0.4900409 0.5659382 0.5099591 #00C1BA
## 3 1 0.8926796 0.1324463 0.1073204 0.3065531 #E68ECF
## 4 1 0.0000000 0.0000000 0.5659382 0.4900409 #A1B453
## 5 1 0.7847215 0.1324463 0.1079580 0.3065531 #5BB5E2
## 6 1 0.7847215 0.0000000 0.2152785 0.1324463 #EC929B
## 7 1 0.5659382 0.1652350 0.2187833 0.2737644 #53BF82
## 8 1 0.5659382 0.4389994 0.4340618 0.5610006 #B79FEB
##
## $type
## [1] "index"
##
## $vSize
## [1] "Streams"
##
## $vColor
## [1] NA
##
## $stdErr
## [1] "Streams"
##
## $algorithm
## [1] "pivotSize"
##
## $vpCoorX
## [1] 0.02812148 0.97187852
##
## $vpCoorY
## [1] 0.01968504 0.91031496
##
## $aspRatio
## [1] 1.483512
##
## $range
## [1] NA
##
## $mapping
## [1] NA NA NA
##
## $draw
## [1] TRUE
After messing around with the bar graphs, I began to think that there was better ways to display this information. I wanted to take the songs and display them proportionately using a tree map. This visualization allows the reader to understand which songs have the most streams based on the area of the rectangle.
# Chat GPT showed me how to make color palette (specifically when I wanted to use three colors for data set of 8 values)
# https://chat.openai.com/
phillies_palette <- c("#E81828", "#002D72", "#FFFFFF")
my_palette <- colorRampPalette(phillies_palette)
my_colors <- my_palette(n = 9) # made this 9 because I didnt want to use white (visual purposes)
plot6 <- treemap(filterstreams,
index="Song",
vSize="Streams",
type="index",
palette = my_colors,
fontcolor.labels = "white",
fontface.labels=3
)
plot6
## $tm
## Song vSize vColor stdErr vColorValue
## 1 Dancing On My Own - Tiësto Remix 150651519 1 150651519 NA
## 2 Dancing On My Own 1202712025 1 1202712025 NA
## 3 No Matter What 137102527 1 137102527 NA
## 4 Where Are You Now 1155735957 1 1155735957 NA
## 5 Whistle (feat. Calum Scott) 137917062 1 137917062 NA
## 6 Woke Up in Love 118822235 1 118822235 NA
## 7 You Are The Reason - Duet Version 249602215 1 249602215 NA
## 8 You Are The Reason 1014780683 1 1014780683 NA
## level x0 y0 w h color
## 1 1 0.5659382 0.0000000 0.2187833 0.1652350 #AE1D3A
## 2 1 0.0000000 0.4900409 0.5659382 0.5099591 #E81828
## 3 1 0.8926796 0.1324463 0.1073204 0.3065531 #74224D
## 4 1 0.0000000 0.0000000 0.5659382 0.4900409 #39275F
## 5 1 0.7847215 0.1324463 0.1079580 0.3065531 #002D72
## 6 1 0.7847215 0.0000000 0.2152785 0.1324463 #3F6195
## 7 1 0.5659382 0.1652350 0.2187833 0.2737644 #BFCADB
## 8 1 0.5659382 0.4389994 0.4340618 0.5610006 #7F96B8
##
## $type
## [1] "index"
##
## $vSize
## [1] "Streams"
##
## $vColor
## [1] NA
##
## $stdErr
## [1] "Streams"
##
## $algorithm
## [1] "pivotSize"
##
## $vpCoorX
## [1] 0.02812148 0.97187852
##
## $vpCoorY
## [1] 0.01968504 0.91031496
##
## $aspRatio
## [1] 1.483512
##
## $range
## [1] NA
##
## $mapping
## [1] NA NA NA
##
## $draw
## [1] TRUE
I then changed the color palette of the Tree Map to make it fit with my project. I made my own palette and included red, blue and white to match the colors of the Phillies. I also changed the color of the text to white to make the information pop.
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
filterstreams$Ranking <- 1:nrow(filterstreams)
# Chat GPT showed me how to edit the over text that you want to show
# https://chat.openai.com/
hover_text <- paste("Ranking:", filterstreams$Ranking)
plot7 <- plot_ly(
filterstreams,
labels = ~Song,
parents = ~"",
values = ~Streams,
marker = list(colors = ~my_colors),
type = "treemap",
text = hover_text,
textinfo = "label",
textfont = list(color="white")
)
plot7 <- plot7 %>%
layout(
title = "Top Calum Scott Songs by Total Streams (Spotify)",
margin = list(t = 60) # moves the title down
)
plot7
Finally, I wanted to make this tree map more enaging and interactive for my user. Therefore, I made the tree map using Plotly where I was able to add more layers. First, the colors of the tree map are now based off the number of streams, rather it being random. My color palette goes from red, blue to white. Therefore, the songs with the highest streams are close to red, while the songs with the least streams are close to blue/white. This change makes the visualization more logical. Next, I added hover text to ensure that the values of the total streams are present. The hover text shows the song title, total streams, and its ranking. Since some boxes appear to be similar in area, I added rankings to show the viewer where each songs sits in Calum Scott’s top 8 songs. I only wanted to include the name of the song in the boxes to make the tree map visually appealing, rather than all the information that is included in the hover text. I did this by indicating that the text information is just the label. Finally, I added a title that captures what the reader is looking at. This is my best plot and I will use this in my final project.
Works Cited
https://www.edureka.co/community/58999/add-image-background-to-ggplot
https://www.geeksforgeeks.org/how-to-add-labels-over-each-bar-in-barplot-in-r/