
# instalar paquete readr
# install.packages("readr")
# cargar paquete readr
library(readr)
# file.choose()
# Importar Bases de Datos
library("DataExplorer")
spotify <- read.csv("C:\\Users\\sebas\\OneDrive\\Escritorio\\Inteligencia de Negocios\\Modulo 2\\12 08 2024\\Most Streamed Spotify Songs 2024.csv")
# Crear Bases de Datos
# Revisar las primeras filas de la base de Datos
head(spotify)
## Track Album.Name Artist
## 1 MILLION DOLLAR BABY Million Dollar Baby - Single Tommy Richman
## 2 Not Like Us Not Like Us Kendrick Lamar
## 3 i like the way you kiss me I like the way you kiss me Artemas
## 4 Flowers Flowers - Single Miley Cyrus
## 5 Houdini Houdini Eminem
## 6 Lovin On Me Lovin On Me Jack Harlow
## Release.Date ISRC All.Time.Rank Track.Score Spotify.Streams
## 1 4/26/2024 QM24S2402528 1 725.4 390,470,936
## 2 5/4/2024 USUG12400910 2 545.9 323,703,884
## 3 3/19/2024 QZJ842400387 3 538.4 601,309,283
## 4 1/12/2023 USSM12209777 4 444.9 2,031,280,633
## 5 5/31/2024 USUG12403398 5 423.3 107,034,922
## 6 11/10/2023 USAT22311371 6 410.1 670,665,438
## Spotify.Playlist.Count Spotify.Playlist.Reach Spotify.Popularity
## 1 30,716 196,631,588 92
## 2 28,113 174,597,137 92
## 3 54,331 211,607,669 92
## 4 269,802 136,569,078 85
## 5 7,223 151,469,874 88
## 6 105,892 175,421,034 83
## YouTube.Views YouTube.Likes TikTok.Posts TikTok.Likes TikTok.Views
## 1 84,274,754 1,713,126 5,767,700 651,565,900 5,332,281,936
## 2 116,347,040 3,486,739 674,700 35,223,547 208,339,025
## 3 122,599,116 2,228,730 3,025,400 275,154,237 3,369,120,610
## 4 1,096,100,899 10,629,796 7,189,811 1,078,757,968 14,603,725,994
## 5 77,373,957 3,670,188 16,400
## 6 131,148,091 1,392,593 4,202,367 214,943,489 2,938,686,633
## YouTube.Playlist.Reach Apple.Music.Playlist.Count AirPlay.Spins
## 1 150,597,040 210 40,975
## 2 156,380,351 188 40,778
## 3 373,784,955 190 74,333
## 4 3,351,188,582 394 1,474,799
## 5 112,763,851 182 12,185
## 6 2,867,222,632 138 522,042
## SiriusXM.Spins Deezer.Playlist.Count Deezer.Playlist.Reach
## 1 684 62 17,598,718
## 2 3 67 10,422,430
## 3 536 136 36,321,847
## 4 2,182 264 24,684,248
## 5 1 82 17,660,624
## 6 4,654 86 17,167,254
## Amazon.Playlist.Count Pandora.Streams Pandora.Track.Stations
## 1 114 18,004,655 22,931
## 2 111 7,780,028 28,444
## 3 172 5,022,621 5,639
## 4 210 190,260,277 203,384
## 5 105 4,493,884 7,006
## 6 152 138,529,362 50,982
## Soundcloud.Streams Shazam.Counts TIDAL.Popularity Explicit.Track
## 1 4,818,457 2,669,262 NA 0
## 2 6,623,075 1,118,279 NA 1
## 3 7,208,651 5,285,340 NA 0
## 4 11,822,942 NA 0
## 5 207,179 457,017 NA 1
## 6 9,438,601 4,517,131 NA 1
# Revisar el Reporte de la Base de Datos
#Crea un reporte base de la base de datos
#create_report(spotify)
# Gráficas de la Base de Datos
# Trae la grafica del reporte creado
introduce(spotify)
## rows columns discrete_columns continuous_columns all_missing_columns
## 1 4600 29 22 6 1
## total_missing_values complete_rows total_observations memory_usage
## 1 7941 0 133400 5679272
plot_intro(spotify)

# plot_boxplot(spotify)
plot_missing(spotify)

plot_histogram(spotify)

plot_bar(spotify)
## 22 columns ignored with more than 50 categories.
## Track: 4370 categories
## Album.Name: 4005 categories
## Artist: 2000 categories
## Release.Date: 1562 categories
## ISRC: 4598 categories
## All.Time.Rank: 4577 categories
## Spotify.Streams: 4426 categories
## Spotify.Playlist.Count: 4208 categories
## Spotify.Playlist.Reach: 4479 categories
## YouTube.Views: 4291 categories
## YouTube.Likes: 4284 categories
## TikTok.Posts: 3319 categories
## TikTok.Likes: 3616 categories
## TikTok.Views: 3617 categories
## YouTube.Playlist.Reach: 3459 categories
## AirPlay.Spins: 3268 categories
## SiriusXM.Spins: 690 categories
## Deezer.Playlist.Reach: 3559 categories
## Pandora.Streams: 3492 categories
## Pandora.Track.Stations: 2976 categories
## Soundcloud.Streams: 1266 categories
## Shazam.Counts: 4003 categories

plot_correlation(spotify)
## Warning in dummify(data, maxcat = maxcat): Ignored all discrete features since
## `maxcat` set to 20 categories!
## Warning: Removed 28 rows containing missing values or values outside the scale range
## (`geom_text()`).
