library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.3.2
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Warning: package 'ggplot2' was built under R version 3.3.2
## Warning: package 'tibble' was built under R version 3.3.2
## Warning: package 'tidyr' was built under R version 3.3.2
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag(): dplyr, stats
theme_set(theme_bw())
tracks = read_csv("alltracks_used-collapsed-song_by_song.csv",
col_types = cols(.default = col_character()))
trace = read_csv("transitions-mood-sound-gender-collaps.csv",
col_types = cols(.default = col_character()))
tracks.long = tracks %>%
group_by(playlist_id) %>%
filter(n() >= 5) %>%
gather(key = "mood",
value = "value",
happy_sad,
aggressive_relaxed,
acoustic_electronic,
timbre_value,
voice_instrumental_value)
Feature co-occurrence
library(vcd)
## Loading required package: grid
tah = tracks %>%
select(aggressive_relaxed, happy_sad) %>%
mutate(happy_sad = ifelse(happy_sad == "neither_happy_sad",
"neither",
happy_sad),
aggressive_relaxed = ifelse(aggressive_relaxed == "neither_aggressive_relaxed",
"neither",
aggressive_relaxed) ) %>%
table()
tah %>%
knitr::kable()
| happy | neither | sad | |
|---|---|---|---|
| aggressive | 8145 | 10629 | 19 |
| neither | 9827 | 10761 | 1567 |
| relaxed | 12925 | 35848 | 14169 |
tah %>%
mosaic()
# library(ggparallel)
#
# ggparallel(names(tracks)[11:12],
# data = tracks,
# method = "parset")
#
# ggparallel(list("gear", "cyl"), data=mtcars)
We’ll look at the proportion of playlists with at least 5 songs that are aggressive/relaxed/sad/etc.
playlists.wide = tracks %>%
group_by(source, playlist_id) %>%
filter(n() >= 5) %>%
summarise(
aggressive = sum(aggressive_relaxed == "aggressive") / n(),
relaxed = sum(aggressive_relaxed == "relaxed") / n(),
sad = sum(happy_sad == "sad") / n(),
happy = sum(happy_sad == "happy") / n()
)
playlists = playlists.wide %>%
gather(key = "mood", value = "prop", -playlist_id, -source)
library(GGally)
##
## Attaching package: 'GGally'
## The following object is masked from 'package:dplyr':
##
## nasa
playlists.wide %>%
ungroup() %>%
select(-playlist_id, -source) %>%
ggpairs(,
lower = list(continuous = wrap("points", alpha = 0.3, size=0.1),
combo = wrap("dot", alpha = 0.4, size=0.2) ))
Parallel coordinates. Each playlist is a line.
playlists %>%
ggplot(aes(x = mood, y = prop, group = playlist_id)) +
geom_line(alpha = .01, size = .5, colour = "darkorange")
Densities
# playlists %>%
# ggplot(aes(x = mood, y = prop)) +
# geom_violin()
playlists %>%
ggplot(aes(x = prop, fill = mood)) +
geom_density(adjust = 1) +
facet_grid(mood ~ .)
tracks.long %>%
filter(playlist_id == 10338) %>% #10229
group_by(mood) %>%
mutate(track = 1:n()) %>%
ggplot(aes(x = track, y = mood, fill = value)) +
geom_tile() +
scale_fill_brewer(type = "qual")
Proportion of transitions that imply in changes, per playlist:
change_proportion = trace %>%
mutate(mood_change = mood_source != mood_target,
sound_change = sound_source != sound_target,
genre_change = genre_source != genre_target) %>%
group_by(playlist_id) %>%
summarise_at(c("mood_change", "sound_change", "genre_change"), mean)
change_proportion %>%
#gather(key = "change", value = "proportion", -playlist_id) %>%
select(-playlist_id) %>%
ggpairs(lower = list(continuous = wrap("points", alpha = 0.3, size=0.2),
combo = wrap("dot", alpha = 0.4, size=0.2) ))
What transitions happen most often?
trace %>%
mutate(mood_change = paste(mood_source, mood_target, sep= ">"),
sound_change = paste(sound_source, sound_target, sep= ">"),
genre_change = paste(genre_source, genre_target, sep= ">")) %>%
select(playlist_id, mood_change, sound_change, genre_change) %>%
gather(key = "type", value = "change", -playlist_id) %>%
count(change) %>%
arrange(-n) %>%
slice(1:30) %>%
ggplot(aes(x = reorder(change, n), y = n)) +
geom_point() +
#scale_y_log10() +
coord_flip()