Importamos la librerias necesarias
library(pacman)
p_load(stringi, tidyverse, readr)
source('../lib/data-access.R')
source('../lib/plot.R')Consultamos los tack featudes del top 10:
top_track_features_collection <- get_collection('track_features_top_10')
top_track_features <- top_track_features_collection$find(fields = '{
"_id": false,
"position": true,
"week_start": true,
"week_end": true,
"reproductions": true,
"name": true,
"artist": true,
"album_id": true,
"album": true,
"number": true,
"disc_number": true,
"album_release_date": true,
"danceability": true,
"energy": true,
"loudness": true,
"speechiness": true,
"acousticness": true,
"instrumentalness": true,
"liveness": true,
"valence": true,
"explicit": true,
"tempo": true,
"time_signature": true,
"duration_ms": true,
"key": true,
"mode": true
}')Convertimos las fecha a tipo date para poder comparar:
top_track_features$week_start <- as.Date(
top_track_features$week_start,
format="%Y-%m-%d"
)
top_track_features$week_end <- as.Date(
top_track_features$week_start,
format="%Y-%m-%d"
)str(top_track_features)## 'data.frame': 1148 obs. of 25 variables:
## $ position : int 10 8 9 3 6 1 9 4 1 10 ...
## $ week_start : Date, format: "2018-12-28" "2019-04-26" ...
## $ week_end : Date, format: "2018-12-28" "2019-04-26" ...
## $ reproductions : int 17560600 20230172 19467987 23468009 22404832 39419339 21577655 32309199 38174455 17215322 ...
## $ name : chr "Calma - Remix" "7 rings" "Soltera - Remix" "All I Want for Christmas Is You" ...
## $ artist : chr "Pedro Capó" "Ariana Grande" "Bad Bunny" "Mariah Carey" ...
## $ album_id : chr "1tFnP9PwIMeMIuj92mfswZ" "2fYhqwDWXjbpjaIJPEfKFw" "2m9Vuc9Q19qhSm6RQmBgsR" "61ulfFSmmxMhc2wCdmdMkN" ...
## $ album : chr "Calma (Remix)" "thank u, next" "Soltera (Remix)" "Merry Christmas" ...
## $ number : int 1 10 1 2 1 9 1 1 10 1 ...
## $ disc_number : int 1 1 1 1 1 2 1 1 1 1 ...
## $ album_release_date: chr "2018-10-05" "2019-02-08" "2019-05-10" "1994-11-01" ...
## $ danceability : num 0.826 0.778 0.795 0.336 0.785 0.835 0.863 0.621 0.778 0.571 ...
## $ energy : num 0.773 0.317 0.783 0.627 0.721 0.626 0.666 0.601 0.317 0.693 ...
## $ loudness : num -4.22 -10.73 -4.27 -7.46 -5.46 ...
## $ speechiness : num 0.0524 0.334 0.0432 0.0384 0.0506 0.125 0.152 0.148 0.334 0.0545 ...
## $ acousticness : num 0.323 0.592 0.361 0.164 0.0149 0.0589 0.212 0.0522 0.592 0.00536 ...
## $ instrumentalness : num 0 0 0 0 0.00432 0.00006 0.000493 0 0 0 ...
## $ liveness : num 0.143 0.0881 0.437 0.0708 0.285 0.396 0.103 0.46 0.0881 0.173 ...
## $ valence : num 0.761 0.327 0.799 0.35 0.894 0.35 0.838 0.457 0.327 0.393 ...
## $ explicit : logi FALSE TRUE FALSE FALSE TRUE TRUE ...
## $ tempo : num 127 140 92 150 122 ...
## $ time_signature : int 4 4 4 4 4 4 4 5 4 4 ...
## $ duration_ms : int 238200 178626 266086 241106 176218 217925 178946 163636 178626 232253 ...
## $ key : chr "B" "C#" "F" "G" ...
## $ mode : chr "minor" "minor" "major" "major" ...
Separamos los features numericos:
track_features.num <- top_track_features %>% select_if(is.numeric)Normalizamos:
track_features.num.scaled <- track_features.num %>% mutate_all(scale)