Este conjunto de datos presenta una recopilación completa de las canciones más reproducidas en Spotify en 2024. Proporciona información detallada sobre los atributos, la popularidad y la presencia de cada pista en varias plataformas de música, lo que ofrece un recurso valioso para analistas musicales, entusiastas y profesionales de la industria. El conjunto de datos incluye información como nombre de la pista, artista, fecha de lanzamiento, ISRC, estadísticas de transmisión y presencia en plataformas como YouTube, TikTok y otras.
library(dplyr)
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readr)
library(dslabs)
library(viridisLite)
library(RColorBrewer)
library(ggplot2)
cancionesBD <- read.csv("Most_Streamed_Spotify_Songs_2024.csv", sep = ",", header = TRUE, dec = ",")
head(cancionesBD)
## Track Album.Name Artist
## 1 MILLION DOLLAR BABY Million Dollar Baby - Single Tommy Richman
## 2 Not Like Us Not Like Us Kendrick Lamar
## 3 i like the way you kiss me I like the way you kiss me Artemas
## 4 Flowers Flowers - Single Miley Cyrus
## 5 Houdini Houdini Eminem
## 6 Lovin On Me Lovin On Me Jack Harlow
## Release.Date ISRC All.Time.Rank Track.Score Spotify.Streams
## 1 4/26/2024 QM24S2402528 1 725.4 390,470,936
## 2 5/4/2024 USUG12400910 2 545.9 323,703,884
## 3 3/19/2024 QZJ842400387 3 538.4 601,309,283
## 4 1/12/2023 USSM12209777 4 444.9 2,031,280,633
## 5 5/31/2024 USUG12403398 5 423.3 107,034,922
## 6 11/10/2023 USAT22311371 6 410.1 670,665,438
## Spotify.Playlist.Count Spotify.Playlist.Reach Spotify.Popularity
## 1 30.716 196,631,588 92
## 2 28.113 174,597,137 92
## 3 54.331 211,607,669 92
## 4 269.802 136,569,078 85
## 5 7.223 151,469,874 88
## 6 105.892 175,421,034 83
## YouTube.Views YouTube.Likes TikTok.Posts TikTok.Likes TikTok.Views
## 1 84,274,754 1,713,126 5,767,700 651,565,900 5,332,281,936
## 2 116,347,040 3,486,739 674,700 35,223,547 208,339,025
## 3 122,599,116 2,228,730 3,025,400 275,154,237 3,369,120,610
## 4 1,096,100,899 10,629,796 7,189,811 1,078,757,968 14,603,725,994
## 5 77,373,957 3,670,188 16,400
## 6 131,148,091 1,392,593 4,202,367 214,943,489 2,938,686,633
## YouTube.Playlist.Reach Apple.Music.Playlist.Count AirPlay.Spins
## 1 150,597,040 210 40,975
## 2 156,380,351 188 40,778
## 3 373,784,955 190 74,333
## 4 3,351,188,582 394 1,474,799
## 5 112,763,851 182 12,185
## 6 2,867,222,632 138 522,042
## SiriusXM.Spins Deezer.Playlist.Count Deezer.Playlist.Reach
## 1 684.000 62 17,598,718
## 2 3.000 67 10,422,430
## 3 536.000 136 36,321,847
## 4 2.182 264 24,684,248
## 5 1.000 82 17,660,624
## 6 4.654 86 17,167,254
## Amazon.Playlist.Count Pandora.Streams Pandora.Track.Stations
## 1 114 18,004,655 22,931
## 2 111 7,780,028 28,444
## 3 172 5,022,621 5,639
## 4 210 190,260,277 203,384
## 5 105 4,493,884 7,006
## 6 152 138,529,362 50,982
## Soundcloud.Streams Shazam.Counts TIDAL.Popularity Explicit.Track
## 1 4,818,457 2,669,262 NA 0
## 2 6,623,075 1,118,279 NA 1
## 3 7,208,651 5,285,340 NA 0
## 4 11,822,942 NA 0
## 5 207,179 457,017 NA 1
## 6 9,438,601 4,517,131 NA 1
Variables Cualitativas (Categorical Variables) Las variables cualitativas son aquellas que describen categorías o cualidades y no tienen un valor numérico intrínseco:
Album Name, Artist, Release Date, ISRC, Explicit Track
Variables Cuantitativas (Numerical Variables) Las variables cuantitativas son aquellas que pueden ser medidas numéricamente. Pueden ser continuas o discretas:
All Time Rank, Track Score, Spotify Streams, Spotify Playlist Count, Spotify Playlist Reach, Spotify Popularity, YouTube Views, YouTube Likes, TikTok Posts, TikTok Likes, TikTok Views, YouTube Playlist Reach, Apple Music Playlist Count, AirPlay Spins, SiriusXM Spins, Deezer Playlist Count, Deezer Playlist Reach, Amazon Playlist Count, Pandora Streams, Pandora Track Stations, Soundcloud Streams, Shazam Counts, TIDAL Popularity
# se crea temporalemnte la conversion a Factor de la columna Explicit.Track de
niveles_var <- levels(factor(cancionesBD$Explicit.Track))
print(niveles_var)
## [1] "0" "1"
#levels(cancionesBD$Album.Name)
#Crear tabla de la columna Explicit.Track para identificar la cantidad de canciones que si (1) y que no (0) tiene contenido explicito
cancionesBD_ce <- cancionesBD %>%
mutate(Explicit.Track = ifelse(Explicit.Track == 1, "SI", "NO"))
head(cancionesBD_ce)
## Track Album.Name Artist
## 1 MILLION DOLLAR BABY Million Dollar Baby - Single Tommy Richman
## 2 Not Like Us Not Like Us Kendrick Lamar
## 3 i like the way you kiss me I like the way you kiss me Artemas
## 4 Flowers Flowers - Single Miley Cyrus
## 5 Houdini Houdini Eminem
## 6 Lovin On Me Lovin On Me Jack Harlow
## Release.Date ISRC All.Time.Rank Track.Score Spotify.Streams
## 1 4/26/2024 QM24S2402528 1 725.4 390,470,936
## 2 5/4/2024 USUG12400910 2 545.9 323,703,884
## 3 3/19/2024 QZJ842400387 3 538.4 601,309,283
## 4 1/12/2023 USSM12209777 4 444.9 2,031,280,633
## 5 5/31/2024 USUG12403398 5 423.3 107,034,922
## 6 11/10/2023 USAT22311371 6 410.1 670,665,438
## Spotify.Playlist.Count Spotify.Playlist.Reach Spotify.Popularity
## 1 30.716 196,631,588 92
## 2 28.113 174,597,137 92
## 3 54.331 211,607,669 92
## 4 269.802 136,569,078 85
## 5 7.223 151,469,874 88
## 6 105.892 175,421,034 83
## YouTube.Views YouTube.Likes TikTok.Posts TikTok.Likes TikTok.Views
## 1 84,274,754 1,713,126 5,767,700 651,565,900 5,332,281,936
## 2 116,347,040 3,486,739 674,700 35,223,547 208,339,025
## 3 122,599,116 2,228,730 3,025,400 275,154,237 3,369,120,610
## 4 1,096,100,899 10,629,796 7,189,811 1,078,757,968 14,603,725,994
## 5 77,373,957 3,670,188 16,400
## 6 131,148,091 1,392,593 4,202,367 214,943,489 2,938,686,633
## YouTube.Playlist.Reach Apple.Music.Playlist.Count AirPlay.Spins
## 1 150,597,040 210 40,975
## 2 156,380,351 188 40,778
## 3 373,784,955 190 74,333
## 4 3,351,188,582 394 1,474,799
## 5 112,763,851 182 12,185
## 6 2,867,222,632 138 522,042
## SiriusXM.Spins Deezer.Playlist.Count Deezer.Playlist.Reach
## 1 684.000 62 17,598,718
## 2 3.000 67 10,422,430
## 3 536.000 136 36,321,847
## 4 2.182 264 24,684,248
## 5 1.000 82 17,660,624
## 6 4.654 86 17,167,254
## Amazon.Playlist.Count Pandora.Streams Pandora.Track.Stations
## 1 114 18,004,655 22,931
## 2 111 7,780,028 28,444
## 3 172 5,022,621 5,639
## 4 210 190,260,277 203,384
## 5 105 4,493,884 7,006
## 6 152 138,529,362 50,982
## Soundcloud.Streams Shazam.Counts TIDAL.Popularity Explicit.Track
## 1 4,818,457 2,669,262 NA NO
## 2 6,623,075 1,118,279 NA SI
## 3 7,208,651 5,285,340 NA NO
## 4 11,822,942 NA NO
## 5 207,179 457,017 NA SI
## 6 9,438,601 4,517,131 NA SI
tabla_expli <- table(cancionesBD_ce$Explicit.Track)
#Crear grafica de barras para la tabla de contenido explicito
colores <- c("blue", "green")
bp_ce <- barplot(tabla_expli,
main="Contenido explicito en canciones",
ylab="Frecuencia",
xlab="Contenido Explicito",
ylim = c(0, max(tabla_expli) * 1.2),
col = colores)
text(bp_ce, tabla_expli, labels = tabla_expli, pos = 3, cex = 0.8, col = "black")
grouped_ec <- cancionesBD_ce %>% group_by(Explicit.Track) %>%
summarise(cantidad = n())
totalce <- sum(grouped_ec$cantidad)
aux1 <- (grouped_ec$cantidad/totalce) * 100
aux1
## [1] 64.12712 35.87288
#pie(tabla_expli, labels = row.names(tabla_expli),col = rocket(4), main = "Diagrama circular", border = rocket(4))
pie(tabla_expli, labels = paste0(grouped_ec$Explicit.Track, "\n", round(aux1,1), "%"),col = rocket(4), main = "Diagrama circular", border = rocket(4))
legend("bottomright",legend = names(tabla_expli),cex = 0.75,fill=rocket(4))
str(cancionesBD)
## 'data.frame': 4594 obs. of 29 variables:
## $ Track : chr "MILLION DOLLAR BABY" "Not Like Us" "i like the way you kiss me" "Flowers" ...
## $ Album.Name : chr "Million Dollar Baby - Single" "Not Like Us" "I like the way you kiss me" "Flowers - Single" ...
## $ Artist : chr "Tommy Richman" "Kendrick Lamar" "Artemas" "Miley Cyrus" ...
## $ Release.Date : chr "4/26/2024" "5/4/2024" "3/19/2024" "1/12/2023" ...
## $ ISRC : chr "QM24S2402528" "USUG12400910" "QZJ842400387" "USSM12209777" ...
## $ All.Time.Rank : num 1 2 3 4 5 6 7 8 9 10 ...
## $ Track.Score : chr "725.4" "545.9" "538.4" "444.9" ...
## $ Spotify.Streams : chr "390,470,936" "323,703,884" "601,309,283" "2,031,280,633" ...
## $ Spotify.Playlist.Count : num 30.72 28.11 54.33 269.8 7.22 ...
## $ Spotify.Playlist.Reach : chr "196,631,588" "174,597,137" "211,607,669" "136,569,078" ...
## $ Spotify.Popularity : int 92 92 92 85 88 83 86 92 NA 86 ...
## $ YouTube.Views : chr "84,274,754" "116,347,040" "122,599,116" "1,096,100,899" ...
## $ YouTube.Likes : chr "1,713,126" "3,486,739" "2,228,730" "10,629,796" ...
## $ TikTok.Posts : chr "5,767,700" "674,700" "3,025,400" "7,189,811" ...
## $ TikTok.Likes : chr "651,565,900" "35,223,547" "275,154,237" "1,078,757,968" ...
## $ TikTok.Views : chr "5,332,281,936" "208,339,025" "3,369,120,610" "14,603,725,994" ...
## $ YouTube.Playlist.Reach : chr "150,597,040" "156,380,351" "373,784,955" "3,351,188,582" ...
## $ Apple.Music.Playlist.Count: int 210 188 190 394 182 138 280 160 NA 191 ...
## $ AirPlay.Spins : chr "40,975" "40,778" "74,333" "1,474,799" ...
## $ SiriusXM.Spins : num 684 3 536 2.18 1 ...
## $ Deezer.Playlist.Count : int 62 67 136 264 82 86 168 87 NA 78 ...
## $ Deezer.Playlist.Reach : chr "17,598,718" "10,422,430" "36,321,847" "24,684,248" ...
## $ Amazon.Playlist.Count : int 114 111 172 210 105 152 154 53 NA 92 ...
## $ Pandora.Streams : chr "18,004,655" "7,780,028" "5,022,621" "190,260,277" ...
## $ Pandora.Track.Stations : chr "22,931" "28,444" "5,639" "203,384" ...
## $ Soundcloud.Streams : chr "4,818,457" "6,623,075" "7,208,651" "" ...
## $ Shazam.Counts : chr "2,669,262" "1,118,279" "5,285,340" "11,822,942" ...
## $ TIDAL.Popularity : logi NA NA NA NA NA NA ...
## $ Explicit.Track : int 0 1 0 0 1 1 0 1 1 1 ...
top_art_spotify <- dplyr::select(cancionesBD, Artist, Spotify.Streams) %>%
top_n(n =10, wt = Spotify.Streams) %>%
arrange(desc(Spotify.Streams))
top_art_spotify$Spotify.Streams <- as.numeric(gsub(",", "", top_art_spotify$Spotify.Streams))
top_art_spotify$valoresM <- paste0(format(top_art_spotify$Spotify.Streams / 1e6,digits = 3, nsmall = 2), "M")
top_art_spotify
## Artist Spotify.Streams valoresM
## 1 American Authors 999525686 999.53M
## 2 Mitski 997702304 997.70M
## 3 Olivia Rodrigo 997536616 997.54M
## 4 Gucci Mane 996942947 996.94M
## 5 The Weeknd 995463505 995.46M
## 6 MEDUZA 995452298 995.45M
## 7 Bad Bunny 991885224 991.89M
## 8 Calvin Harris 991382346 991.38M
## 9 R. City 990518773 990.52M
## 10 Lizzo 990118320 990.12M
ggplot(top_art_spotify, aes(x = reorder(Artist, Spotify.Streams), y = valoresM)) +
geom_bar(stat = "identity", fill = "#4682B4") +
geom_text(aes(label = valoresM), vjust = -0.5, size = 3) + # Agregar texto a las barras
coord_flip() +
labs(title = "Top 10 Artistas Mas Escuchados en Spotify",
x = "Artista",
y = "Numero de Reproducciones") +
#scale_y_continuous(labels = scales::comma) +
theme_minimal()
#str(cancionesBD)
top_art_youtube <- dplyr::select(cancionesBD, Artist, YouTube.Views) %>%
top_n(n =10, wt = YouTube.Views) %>%
arrange(desc(YouTube.Views))
top_art_youtube$YouTube.Views <- as.numeric(gsub(",", "", top_art_youtube$YouTube.Views))
top_art_youtube$valoresMY <- paste0(format(top_art_youtube$YouTube.Views / 1e6,digits = 3, nsmall = 2), "M")
top_art_spotify
## Artist Spotify.Streams valoresM
## 1 American Authors 999525686 999.53M
## 2 Mitski 997702304 997.70M
## 3 Olivia Rodrigo 997536616 997.54M
## 4 Gucci Mane 996942947 996.94M
## 5 The Weeknd 995463505 995.46M
## 6 MEDUZA 995452298 995.45M
## 7 Bad Bunny 991885224 991.89M
## 8 Calvin Harris 991382346 991.38M
## 9 R. City 990518773 990.52M
## 10 Lizzo 990118320 990.12M
ggplot(top_art_youtube, aes(x = reorder(Artist, YouTube.Views), y = valoresMY)) +
geom_bar(stat = "identity", fill = "#4682B4") +
geom_text(aes(label = valoresMY), vjust = -0.5, size = 3) + # Agregar texto a las barras
coord_flip() +
labs(title = "Top 10 Artistas Mas Escuchados en YouTube",
x = "Artista",
y = "Numero de Reproducciones") +
#scale_y_continuous(labels = scales::comma) +
theme_minimal()
#str(cancionesBD)
top_art_tiktok <- dplyr::select(cancionesBD, Artist, TikTok.Views) %>%
top_n(n =10, wt = TikTok.Views) %>%
arrange(desc(TikTok.Views))
top_art_tiktok$TikTok.Views <- as.numeric(gsub(",", "", top_art_tiktok$TikTok.Views))
top_art_tiktok$valoresMT <- paste0(format(top_art_tiktok$TikTok.Views / 1e6,digits = 3, nsmall = 2), "M")
ggplot(top_art_tiktok, aes(x = reorder(Artist, TikTok.Views), y = valoresMT)) +
geom_bar(stat = "identity", fill = "#4682B4") +
geom_text(aes(label = valoresMT), vjust = -0.5, size = 3) + # Agregar texto a las barras
coord_flip() +
labs(title = "Top 10 Artistas Mas Escuchados en TikTok",
x = "Artista",
y = "Numero de Reproducciones") +
#scale_y_continuous(labels = scales::comma) +
theme_minimal()