
#file.choose()
# Instalar Paquetes y llamar librerias
#install.packages("DataExplorer", repos = "http://cran.us.r-project.org")
library("DataExplorer")
data <- read.csv("C:/Users/gabri/Downloads/Tec/Sem 7/Modulo 2/Most Streamed Spotify Songs 2024.csv")
# Crear el reporte
create_report(data)
##
##
## processing file: report.rmd
## | | | 0% | |. | 2% | |.. | 5% [global_options] | |... | 7% | |.... | 10% [introduce] | |.... | 12% | |..... | 14% [plot_intro] | |...... | 17% | |....... | 19% [data_structure] | |........ | 21% | |......... | 24% [missing_profile] | |.......... | 26% | |........... | 29% [univariate_distribution_header] | |........... | 31% | |............ | 33% [plot_histogram] | |............. | 36% | |.............. | 38% [plot_density] | |............... | 40% | |................ | 43% [plot_frequency_bar] | |................. | 45% | |.................. | 48% [plot_response_bar] | |.................. | 50% | |................... | 52% [plot_with_bar] | |.................... | 55% | |..................... | 57% [plot_normal_qq] | |...................... | 60% | |....................... | 62% [plot_response_qq] | |........................ | 64% | |......................... | 67% [plot_by_qq] | |.......................... | 69% | |.......................... | 71% [correlation_analysis] | |........................... | 74% | |............................ | 76% [principal_component_analysis] | |............................. | 79% | |.............................. | 81% [bivariate_distribution_header] | |............................... | 83% | |................................ | 86% [plot_response_boxplot] | |................................. | 88% | |................................. | 90% [plot_by_boxplot] | |.................................. | 93% | |................................... | 95% [plot_response_scatterplot] | |.................................... | 98% | |.....................................| 100% [plot_by_scatterplot]
## output file: C:/Users/gabri/Downloads/Tec/Sem 7/Modulo 2/report.knit.md
## "C:/Program Files/RStudio/resources/app/bin/quarto/bin/tools/pandoc" +RTS -K512m -RTS "C:\Users\gabri\DOWNLO~1\Tec\SEM7~1\MODULO~1\REPORT~1.MD" --to html4 --from markdown+autolink_bare_uris+tex_math_single_backslash --output pandoc58584b542c4f.html --lua-filter "C:\Users\gabri\AppData\Local\R\win-library\4.4\rmarkdown\rmarkdown\lua\pagebreak.lua" --lua-filter "C:\Users\gabri\AppData\Local\R\win-library\4.4\rmarkdown\rmarkdown\lua\latex-div.lua" --embed-resources --standalone --variable bs3=TRUE --section-divs --table-of-contents --toc-depth 6 --template "C:\Users\gabri\AppData\Local\R\win-library\4.4\rmarkdown\rmd\h\default.html" --no-highlight --variable highlightjs=1 --variable theme=yeti --mathjax --variable "mathjax-url=https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML" --include-in-header "C:\Users\gabri\AppData\Local\Temp\RtmpQRJF3M\rmarkdown-str58584290641b.html"
##
## Output created: report.html
# Crear las graficas
introduce(data)
## rows columns discrete_columns continuous_columns all_missing_columns
## 1 4600 29 22 6 1
## total_missing_values complete_rows total_observations memory_usage
## 1 7941 0 133400 5679272
plot_intro(data)

plot_missing(data)

plot_histogram(data)

plot_bar(data)
## 22 columns ignored with more than 50 categories.
## Track: 4370 categories
## Album.Name: 4005 categories
## Artist: 2000 categories
## Release.Date: 1562 categories
## ISRC: 4598 categories
## All.Time.Rank: 4577 categories
## Spotify.Streams: 4426 categories
## Spotify.Playlist.Count: 4208 categories
## Spotify.Playlist.Reach: 4479 categories
## YouTube.Views: 4291 categories
## YouTube.Likes: 4284 categories
## TikTok.Posts: 3319 categories
## TikTok.Likes: 3616 categories
## TikTok.Views: 3617 categories
## YouTube.Playlist.Reach: 3459 categories
## AirPlay.Spins: 3268 categories
## SiriusXM.Spins: 690 categories
## Deezer.Playlist.Reach: 3559 categories
## Pandora.Streams: 3492 categories
## Pandora.Track.Stations: 2976 categories
## Soundcloud.Streams: 1266 categories
## Shazam.Counts: 4003 categories

plot_correlation(data)
## Warning in dummify(data, maxcat = maxcat): Ignored all discrete features since
## `maxcat` set to 20 categories!
## Warning: Removed 28 rows containing missing values or values outside the scale range
## (`geom_text()`).

# Conclusion: Se hizo los pasos muy similar a la actividad 1 pero se mando a llamar a un csv.