Import Library and Dataset We worked on the Spotify Top 200 Charts (2020-2021) dataset. This dataset includes all the songs that have been on the Top 200 Weekly (Global) charts of Spotify in 2020 & 2021, and uses a variety of variables to describe and rank each song. The data is accurate and trustworthy as it is pulled directly from Spotify.
library(ggplot2)
library(dplyr)
library(tidyverse)
library(hrbrthemes)
library(wordcloud2)
#Import Spotify Data Set
spot.df <- read.csv("SpotifyDataset_2019-2021.csv")
head(spot.df,4)
p <- ggplot(spot.df, aes(x=Danceability, y=Tempo)) +
geom_point(alpha = 0.4, color="blue")+
theme_ipsum()
p + ggtitle("Identifying the best tempo for Songs with high Danceability")
p <- ggplot(spot.df, aes(x=factor(Chord), y=..count.., color = Chord))+
ggtitle("Top 200 Song on Spotify: Song Count of each Chord")
p + geom_bar(mapping = NULL, stat = "count", position = "stack",
width = NULL, na.rm = FALSE,
show.legend = NA, inherit.aes = TRUE)
bp <- ggplot(spot.df) +
geom_boxplot(aes(x=Chord, y=Popularity, color = Chord)) +
xlab("Key (Chords)") +
ylab("Popularity") +
ggtitle("Does Chords (Key) \nAffect Popularity with statistical significance?")
bp
p <- ggplot(spot.df, aes(x=Energy, y=Loudness, color = Chord), size = Popularity) +
geom_point(alpha = 0.4)
p + ggtitle("")
p <- ggplot(spot.df, aes(x=HighestChartingPosition, y=Popularity, color = Chord)) +
geom_point(alpha = 0.4)+
ylim(45,100)
p + ggtitle("Popularity by Highest Charting Position (Ranking) \nWith Chord Idenfication")
p1 <- ggplot(spot.df, aes(x=Popularity, y=Streams, color = Chord)) +
geom_point(alpha = 0.4)+ scale_y_log10()
p1 + ggtitle("Streams by Popularity \nWith Chord Idenfication")