library("DataExplorer")
df <- read.csv("C:\\Users\\mari0\\OneDrive\\Documents\\R Studio\\IA con impacto empresarial\\Modulo 2\\BD\\Most Streamed Spotify Songs 2024.csv")
str(df)
## 'data.frame': 4600 obs. of 29 variables:
## $ Track : chr "MILLION DOLLAR BABY" "Not Like Us" "i like the way you kiss me" "Flowers" ...
## $ Album.Name : chr "Million Dollar Baby - Single" "Not Like Us" "I like the way you kiss me" "Flowers - Single" ...
## $ Artist : chr "Tommy Richman" "Kendrick Lamar" "Artemas" "Miley Cyrus" ...
## $ Release.Date : chr "4/26/2024" "5/4/2024" "3/19/2024" "1/12/2023" ...
## $ ISRC : chr "QM24S2402528" "USUG12400910" "QZJ842400387" "USSM12209777" ...
## $ All.Time.Rank : chr "1" "2" "3" "4" ...
## $ Track.Score : num 725 546 538 445 423 ...
## $ Spotify.Streams : chr "390,470,936" "323,703,884" "601,309,283" "2,031,280,633" ...
## $ Spotify.Playlist.Count : chr "30,716" "28,113" "54,331" "269,802" ...
## $ Spotify.Playlist.Reach : chr "196,631,588" "174,597,137" "211,607,669" "136,569,078" ...
## $ Spotify.Popularity : int 92 92 92 85 88 83 86 92 NA 86 ...
## $ YouTube.Views : chr "84,274,754" "116,347,040" "122,599,116" "1,096,100,899" ...
## $ YouTube.Likes : chr "1,713,126" "3,486,739" "2,228,730" "10,629,796" ...
## $ TikTok.Posts : chr "5,767,700" "674,700" "3,025,400" "7,189,811" ...
## $ TikTok.Likes : chr "651,565,900" "35,223,547" "275,154,237" "1,078,757,968" ...
## $ TikTok.Views : chr "5,332,281,936" "208,339,025" "3,369,120,610" "14,603,725,994" ...
## $ YouTube.Playlist.Reach : chr "150,597,040" "156,380,351" "373,784,955" "3,351,188,582" ...
## $ Apple.Music.Playlist.Count: int 210 188 190 394 182 138 280 160 NA 191 ...
## $ AirPlay.Spins : chr "40,975" "40,778" "74,333" "1,474,799" ...
## $ SiriusXM.Spins : chr "684" "3" "536" "2,182" ...
## $ Deezer.Playlist.Count : int 62 67 136 264 82 86 168 87 NA 78 ...
## $ Deezer.Playlist.Reach : chr "17,598,718" "10,422,430" "36,321,847" "24,684,248" ...
## $ Amazon.Playlist.Count : int 114 111 172 210 105 152 154 53 NA 92 ...
## $ Pandora.Streams : chr "18,004,655" "7,780,028" "5,022,621" "190,260,277" ...
## $ Pandora.Track.Stations : chr "22,931" "28,444" "5,639" "203,384" ...
## $ Soundcloud.Streams : chr "4,818,457" "6,623,075" "7,208,651" "" ...
## $ Shazam.Counts : chr "2,669,262" "1,118,279" "5,285,340" "11,822,942" ...
## $ TIDAL.Popularity : logi NA NA NA NA NA NA ...
## $ Explicit.Track : int 0 1 0 0 1 1 0 1 1 1 ...
introduce(df)
## rows columns discrete_columns continuous_columns all_missing_columns
## 1 4600 29 22 6 1
## total_missing_values complete_rows total_observations memory_usage
## 1 7941 0 133400 5679272
plot_intro(df)
plot_missing(df)
plot_histogram(df)
plot_bar(df)
plot_correlation(df)
create_report(df)
##
##
## processing file: report.rmd
## | | | 0% | |. | 2% | |.. | 5% [global_options] | |... | 7% | |.... | 10% [introduce] | |.... | 12% | |..... | 14% [plot_intro] | |...... | 17% | |....... | 19% [data_structure] | |........ | 21% | |......... | 24% [missing_profile] | |.......... | 26% | |........... | 29% [univariate_distribution_header] | |........... | 31% | |............ | 33% [plot_histogram] | |............. | 36% | |.............. | 38% [plot_density] | |............... | 40% | |................ | 43% [plot_frequency_bar] | |................. | 45% | |.................. | 48% [plot_response_bar] | |.................. | 50% | |................... | 52% [plot_with_bar] | |.................... | 55% | |..................... | 57% [plot_normal_qq] | |...................... | 60% | |....................... | 62% [plot_response_qq] | |........................ | 64% | |......................... | 67% [plot_by_qq] | |.......................... | 69% | |.......................... | 71% [correlation_analysis] | |........................... | 74% | |............................ | 76% [principal_component_analysis] | |............................. | 79% | |.............................. | 81% [bivariate_distribution_header] | |............................... | 83% | |................................ | 86% [plot_response_boxplot] | |................................. | 88% | |................................. | 90% [plot_by_boxplot] | |.................................. | 93% | |................................... | 95% [plot_response_scatterplot] | |.................................... | 98% | |.....................................| 100% [plot_by_scatterplot]
## output file: C:/Users/mari0/OneDrive/Documents/R Studio/IA con impacto empresarial/Modulo 2/report.knit.md
## "C:/Program Files/RStudio/resources/app/bin/quarto/bin/tools/pandoc" +RTS -K512m -RTS "C:\Users\mari0\OneDrive\DOCUME~1\RSTUDI~1\IACONI~1\MODULO~1\REPORT~1.MD" --to html4 --from markdown+autolink_bare_uris+tex_math_single_backslash --output pandoc41c0156b6b22.html --lua-filter "C:\Users\mari0\AppData\Local\R\win-library\4.4\rmarkdown\rmarkdown\lua\pagebreak.lua" --lua-filter "C:\Users\mari0\AppData\Local\R\win-library\4.4\rmarkdown\rmarkdown\lua\latex-div.lua" --embed-resources --standalone --variable bs3=TRUE --section-divs --table-of-contents --toc-depth 6 --template "C:\Users\mari0\AppData\Local\R\win-library\4.4\rmarkdown\rmd\h\default.html" --no-highlight --variable highlightjs=1 --variable theme=yeti --mathjax --variable "mathjax-url=https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML" --include-in-header "C:\Users\mari0\AppData\Local\Temp\RtmpoZQsi4\rmarkdown-str41c070d25874.html"
##
## Output created: report.html
Este código hace una exploración inicial y análisis del conjunto de datos sobre las canciones más reproducidas en Spotify en 2024. Se generan visualizaciones que incluyen una introducción a los datos, gráficos de valores faltantes, histogramas y gráficos de barras, proporcionando una visión general del contenido y la calidad del conjunto de datos.