R Notebook

library(tidyverse)

## Warning: package 'tidyverse' was built under R version 3.3.2

## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr

## Warning: package 'ggplot2' was built under R version 3.3.2

## Warning: package 'tibble' was built under R version 3.3.2

## Warning: package 'tidyr' was built under R version 3.3.2

## Conflicts with tidy packages ----------------------------------------------

## filter(): dplyr, stats
## lag():    dplyr, stats

theme_set(theme_bw())

tracks = read_csv("alltracks_used-collapsed-song_by_song.csv", 
                  col_types = cols(.default = col_character()))
trace = read_csv("transitions-mood-sound-gender-collaps.csv", 
                 col_types = cols(.default = col_character()))

Tracks

tracks.long = tracks %>%
    group_by(playlist_id) %>% 
    filter(n() >= 5) %>% 
    gather(key = "mood", 
           value = "value", 
           happy_sad, 
           aggressive_relaxed, 
           acoustic_electronic, 
           timbre_value, 
           voice_instrumental_value)

Feature co-occurrence

library(vcd)

## Loading required package: grid

tah = tracks %>% 
    select(aggressive_relaxed, happy_sad) %>% 
    mutate(happy_sad = ifelse(happy_sad == "neither_happy_sad", 
                              "neither", 
                              happy_sad), 
           aggressive_relaxed = ifelse(aggressive_relaxed == "neither_aggressive_relaxed", 
                                       "neither", 
                                       aggressive_relaxed) ) %>% 
    table() 

tah %>% 
    knitr::kable()

	happy	neither	sad
aggressive	8145	10629	19
neither	9827	10761	1567
relaxed	12925	35848	14169

tah %>% 
    mosaic()

# library(ggparallel)
# 
# ggparallel(names(tracks)[11:12], 
#            data = tracks, 
#            method = "parset")
# 
# ggparallel(list("gear", "cyl"), data=mtcars)

Playlists

We’ll look at the proportion of playlists with at least 5 songs that are aggressive/relaxed/sad/etc.

playlists.wide = tracks %>%
    group_by(source, playlist_id) %>% 
    filter(n() >= 5) %>% 
    summarise(
        aggressive = sum(aggressive_relaxed == "aggressive") / n(), 
        relaxed = sum(aggressive_relaxed == "relaxed") / n(), 
        sad = sum(happy_sad == "sad") / n(), 
        happy = sum(happy_sad == "happy") / n()
    ) 

playlists = playlists.wide %>% 
    gather(key = "mood", value = "prop", -playlist_id, -source)

library(GGally)

## 
## Attaching package: 'GGally'

## The following object is masked from 'package:dplyr':
## 
##     nasa

playlists.wide %>% 
    ungroup() %>% 
    select(-playlist_id, -source) %>%
    ggpairs(, 
            lower = list(continuous = wrap("points", alpha = 0.3,    size=0.1), 
                         combo = wrap("dot", alpha = 0.4,            size=0.2) ))

Parallel coordinates. Each playlist is a line.

playlists %>% 
    ggplot(aes(x = mood, y = prop, group = playlist_id)) + 
    geom_line(alpha = .01, size = .5, colour = "darkorange")

Densities

# playlists %>% 
#     ggplot(aes(x = mood, y = prop)) + 
#     geom_violin()

playlists %>% 
    ggplot(aes(x = prop, fill = mood)) + 
    geom_density(adjust = 1) + 
    facet_grid(mood ~ .)

Zoom at one playlist

tracks.long %>% 
    filter(playlist_id == 10338) %>% #10229
    group_by(mood) %>%
    mutate(track = 1:n()) %>% 
    ggplot(aes(x = track, y = mood, fill = value)) + 
    geom_tile() + 
    scale_fill_brewer(type = "qual")

Transitions

Proportion of transitions that imply in changes, per playlist:

change_proportion = trace %>% 
    mutate(mood_change = mood_source != mood_target, 
           sound_change = sound_source != sound_target, 
           genre_change = genre_source != genre_target) %>% 
    group_by(playlist_id) %>%
    summarise_at(c("mood_change", "sound_change", "genre_change"), mean)

change_proportion %>% 
    #gather(key = "change", value = "proportion", -playlist_id) %>% 
    select(-playlist_id) %>% 
    ggpairs(lower = list(continuous = wrap("points", alpha = 0.3, size=0.2), 
                         combo = wrap("dot", alpha = 0.4, size=0.2) ))

What transitions happen most often?

trace %>% 
    mutate(mood_change = paste(mood_source, mood_target, sep= ">"),
           sound_change = paste(sound_source, sound_target, sep= ">"), 
           genre_change = paste(genre_source, genre_target, sep= ">")) %>% 
    select(playlist_id, mood_change, sound_change, genre_change) %>% 
    gather(key = "type", value = "change", -playlist_id) %>% 
    count(change) %>% 
    arrange(-n) %>% 
    slice(1:30) %>%
    ggplot(aes(x = reorder(change, n), y = n)) + 
    geom_point() + 
    #scale_y_log10() + 
    coord_flip()