# instalar paquete readr
# install.packages("readr")
 
# cargar paquete readr
library(readr)
# file.choose()
# Importar Bases de Datos
library("DataExplorer")

spotify <- read.csv("C:\\Users\\sebas\\OneDrive\\Escritorio\\Inteligencia de Negocios\\Modulo 2\\12 08 2024\\Most Streamed Spotify Songs 2024.csv")

# Crear Bases de Datos
# Revisar las primeras filas de la base de Datos
head(spotify)
##                        Track                   Album.Name         Artist
## 1        MILLION DOLLAR BABY Million Dollar Baby - Single  Tommy Richman
## 2                Not Like Us                  Not Like Us Kendrick Lamar
## 3 i like the way you kiss me   I like the way you kiss me        Artemas
## 4                    Flowers             Flowers - Single    Miley Cyrus
## 5                    Houdini                      Houdini         Eminem
## 6                Lovin On Me                  Lovin On Me    Jack Harlow
##   Release.Date         ISRC All.Time.Rank Track.Score Spotify.Streams
## 1    4/26/2024 QM24S2402528             1       725.4     390,470,936
## 2     5/4/2024 USUG12400910             2       545.9     323,703,884
## 3    3/19/2024 QZJ842400387             3       538.4     601,309,283
## 4    1/12/2023 USSM12209777             4       444.9   2,031,280,633
## 5    5/31/2024 USUG12403398             5       423.3     107,034,922
## 6   11/10/2023 USAT22311371             6       410.1     670,665,438
##   Spotify.Playlist.Count Spotify.Playlist.Reach Spotify.Popularity
## 1                 30,716            196,631,588                 92
## 2                 28,113            174,597,137                 92
## 3                 54,331            211,607,669                 92
## 4                269,802            136,569,078                 85
## 5                  7,223            151,469,874                 88
## 6                105,892            175,421,034                 83
##   YouTube.Views YouTube.Likes TikTok.Posts  TikTok.Likes   TikTok.Views
## 1    84,274,754     1,713,126    5,767,700   651,565,900  5,332,281,936
## 2   116,347,040     3,486,739      674,700    35,223,547    208,339,025
## 3   122,599,116     2,228,730    3,025,400   275,154,237  3,369,120,610
## 4 1,096,100,899    10,629,796    7,189,811 1,078,757,968 14,603,725,994
## 5    77,373,957     3,670,188       16,400                             
## 6   131,148,091     1,392,593    4,202,367   214,943,489  2,938,686,633
##   YouTube.Playlist.Reach Apple.Music.Playlist.Count AirPlay.Spins
## 1            150,597,040                        210        40,975
## 2            156,380,351                        188        40,778
## 3            373,784,955                        190        74,333
## 4          3,351,188,582                        394     1,474,799
## 5            112,763,851                        182        12,185
## 6          2,867,222,632                        138       522,042
##   SiriusXM.Spins Deezer.Playlist.Count Deezer.Playlist.Reach
## 1            684                    62            17,598,718
## 2              3                    67            10,422,430
## 3            536                   136            36,321,847
## 4          2,182                   264            24,684,248
## 5              1                    82            17,660,624
## 6          4,654                    86            17,167,254
##   Amazon.Playlist.Count Pandora.Streams Pandora.Track.Stations
## 1                   114      18,004,655                 22,931
## 2                   111       7,780,028                 28,444
## 3                   172       5,022,621                  5,639
## 4                   210     190,260,277                203,384
## 5                   105       4,493,884                  7,006
## 6                   152     138,529,362                 50,982
##   Soundcloud.Streams Shazam.Counts TIDAL.Popularity Explicit.Track
## 1          4,818,457     2,669,262               NA              0
## 2          6,623,075     1,118,279               NA              1
## 3          7,208,651     5,285,340               NA              0
## 4                       11,822,942               NA              0
## 5            207,179       457,017               NA              1
## 6          9,438,601     4,517,131               NA              1
# Revisar el Reporte de la Base de Datos
#Crea un reporte base de la base de datos
#create_report(spotify)
# Gráficas de la Base de Datos
# Trae la grafica del reporte creado
introduce(spotify)
##   rows columns discrete_columns continuous_columns all_missing_columns
## 1 4600      29               22                  6                   1
##   total_missing_values complete_rows total_observations memory_usage
## 1                 7941             0             133400      5679272
plot_intro(spotify)

# plot_boxplot(spotify)
plot_missing(spotify)

plot_histogram(spotify)

plot_bar(spotify)
## 22 columns ignored with more than 50 categories.
## Track: 4370 categories
## Album.Name: 4005 categories
## Artist: 2000 categories
## Release.Date: 1562 categories
## ISRC: 4598 categories
## All.Time.Rank: 4577 categories
## Spotify.Streams: 4426 categories
## Spotify.Playlist.Count: 4208 categories
## Spotify.Playlist.Reach: 4479 categories
## YouTube.Views: 4291 categories
## YouTube.Likes: 4284 categories
## TikTok.Posts: 3319 categories
## TikTok.Likes: 3616 categories
## TikTok.Views: 3617 categories
## YouTube.Playlist.Reach: 3459 categories
## AirPlay.Spins: 3268 categories
## SiriusXM.Spins: 690 categories
## Deezer.Playlist.Reach: 3559 categories
## Pandora.Streams: 3492 categories
## Pandora.Track.Stations: 2976 categories
## Soundcloud.Streams: 1266 categories
## Shazam.Counts: 4003 categories

plot_correlation(spotify)
## Warning in dummify(data, maxcat = maxcat): Ignored all discrete features since
## `maxcat` set to 20 categories!
## Warning: Removed 28 rows containing missing values or values outside the scale range
## (`geom_text()`).