library(readxl)
library(stringr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ readr   2.1.1
## ✓ tibble  3.1.6     ✓ purrr   0.3.4
## ✓ tidyr   1.1.4     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
chart_songs <- read_excel('chart_songs.xlsx')
library(janitor)
## 
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
Introduction and Background

Out of curiosity, I wanted to examine if the number of songs featuring another artist had increased as time went on. Taking a dataset of Billboard top 100 songs from the last twenty years, I then cleaned the data and filtered for songs that included two or more separate artists (e.g. a band would be considered 1 unit)

chart_songs$featured_list <- ifelse(grepl("&", chart_songs$artist) ==TRUE, 1,0)
#View(chart_songs)
ab <- str_count(chart_songs$artist, ",")
max(ab)
## [1] 3

From this data we see that collaborations has increased over time. In the year 2000 about 10% of songs contained a collaboration in around 2020 close to half of songs do.

chart_songs$ID <- seq.int(nrow(chart_songs))
View(chart_songs)
t1 <- chart_songs %>%
  tabyl(featured_list, year)
t1
##  featured_list 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012
##              0   89   78   77   67   70   73   70   74   75   79   66   63   70
##              1   11   22   23   33   30   27   30   26   25   21   34   37   30
##  2013 2014 2015 2016 2017 2018 2019 2020 2021
##    60   68   61   60   52   49   58   57   58
##    40   32   39   40   48   51   42   43   42
features <- unlist(t1[2,])
features <- features[-1]
year_g <- c(2000:2021)
plot(year_g, features/100)

barplot(features/100)

Data only featuring collaborations is marked and then subsetted. Following this step, we create an two mode network where each artist is connected to a song.

features <- subset(chart_songs, featured_list ==1)

df_feat <- features %>% separate(artist, c("x1", "x2", "x3", "x4", "x5"), sep = "([,&])")
## Warning: Expected 5 pieces. Missing pieces filled with `NA` in 722 rows [1, 2,
## 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...].
artists <- df_feat$x1
songs <- df_feat$song
position <- df_feat$position
year <-df_feat$year

my_range <- c(4,5,6,7)
my_col <- c("x2", "x3", "x4", "x5")
rows <-nrow(df_feat)

artists_featured <- c()
songs_featured <- c()
position_featured <- c()
year_featured <- c()


my_subset <- subset(df_feat,is.na(x2)==FALSE) 
new_art_2 <- my_subset[,"x2"]
new_song_2 <- my_subset[,"song"]
new_position_2 <- my_subset[, "position"]
new_year_2 <- my_subset[ , "year"]



my_subset <- subset(df_feat,is.na(x3)==FALSE) 
new_art_3 <- my_subset[,"x3"]
new_song_3 <- my_subset[,"song"]
new_position_3 <- my_subset[, "position"]
new_year_3 <- my_subset[ , "year"]



my_subset <- subset(df_feat,is.na(x4)==FALSE) 
new_art_4 <- my_subset[,"x4"]
new_song_4 <- my_subset[,"song"]
new_position_4 <- my_subset[, "position"]
new_year_4 <- my_subset[ , "year"]



my_subset <- subset(df_feat,is.na(x5)==FALSE) 
new_art_5 <- my_subset[,"x5"]
new_song_5 <- my_subset[,"song"]
new_position_5 <- my_subset[, "position"]
new_year_5 <- my_subset[ , "year"]

second <- cbind(new_art_2, new_song_2, new_position_2, new_year_2)
colnames(second) <- c("artist","song","position", "year")
third <- cbind(new_art_3, new_song_3, new_position_3, new_year_3)
colnames(third) <- c("artist","song","position", "year")
fourth <- cbind(new_art_4, new_song_4, new_position_4, new_year_4)
colnames(fourth) <- c("artist","song","position", "year")
fifth <- cbind(new_art_5, new_song_5, new_position_5, new_year_5)
colnames(fifth) <- c("artist","song","position", "year")

feature_list <- rbind(second,third,fourth,fifth)
orig_list <- cbind(artists, songs, position, year)
colnames(orig_list) <- c("artist", "song", "position", "year")

collab_list <- rbind(orig_list,feature_list)
collab_list$artist <- collab_list$artist %>%str_trim()
write_csv(collab_list, file = "~/Library/CloudStorage/Box-Box/Taurean/Year_1/SSNA/collab_list.csv")
library(igraph)
## 
## Attaching package: 'igraph'
## The following objects are masked from 'package:purrr':
## 
##     compose, simplify
## The following object is masked from 'package:tidyr':
## 
##     crossing
## The following object is masked from 'package:tibble':
## 
##     as_data_frame
## The following objects are masked from 'package:dplyr':
## 
##     as_data_frame, groups, union
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## The following object is masked from 'package:base':
## 
##     union

Part 1 The elements in this two mode network graph:

Vertices: Artist (musicians) & Songs that made the top 100 from 2000 to 2021 Edges: Artists featured on the same song that made the top 100. Basically its collaborarions of artists. For simplicity of graph’s I’m only using 2020 data as a subset Artists are represented by red circles songs are yellow circles

small_set <- subset(collab_list, year == 2020)
my_data <- cbind(small_set$artist,small_set$song)
g <- graph.data.frame(my_data, directed = TRUE)
V(g)$type <- bipartite_mapping(g)$type
V(g)$color <- ifelse(V(g)$type == "TRUE", "gold", "tomato")
plot(g, vertex.size = 3, vertex.label.cex = .35, edge.arrow.size =.25)

my_data <- as.matrix(my_data)
row.names(my_data)<-my_data[,1]
G <- graph.data.frame(my_data, directed = FALSE)
my_edgelist <-get.edgelist(G)
#my_edgelist
A <- get.adjacency(G, sparse = FALSE)
#View(A)
#my_edgelist
#A <- as_adjacency_matrix(G,type="both",names=TRUE,sparse=FALSE)

#plot(a, vertex.size = 10, vertex.label.cex = .25)
#View(A)
# My adjacency matrix lists songs to artist. When it's made the first half of the matrix repeats artists, so I cut that to get the actual matrix of song to artist
B <- A[69:109,]

Part 2

In part two we transform the 2 mode network of artists collaborating on songs into a one mode network which represents artist who have collaborated on a song together. The diagonal in this model represents the total songs the artist was featured on.

#### We do the transformation to get the artist to artist (people to people) network
onemode_artists <- t(B)%*%B
head(onemode_artists,3)
##           Tones Future Jawsh 685 Justin Bieber DaBaby Cardi B Jack Harlow Topic
## Tones         1      0         0             0      0       0           0     0
## Future        0      1         0             0      0       0           0     0
## Jawsh 685     0      0         1             0      0       0           0     0
##           Chris Brown Pop Smoke Drake Lady Gaga Doja Cat Dababy 24kGoldn
## Tones               0         0     0         0        0      0        0
## Future              0         0     1         0        0      0        0
## Jawsh 685           0         0     0         0        0      0        0
##           Camila Cabello Roddy Ricch Eminem Mustard Benee Juice WRLD Dan
## Tones                  0           0      0       0     0          0   0
## Future                 0           0      0       0     0          0   0
## Jawsh 685              0           0      0       0     0          0   0
##           Gabby Barrett Karol G Powfu Megan Thee Stallion DJ Khaled
## Tones                 0       0     0                   0         0
## Future                0       0     0                   0         0
## Jawsh 685             0       0     0                   0         0
##           Shawn Mendes Ariana Grande JP Saxe Blake Shelton Duet
## Tones                0             0       0                  0
## Future               0             0       0                  0
## Jawsh 685            0             0       0                  0
##           The Black Eyed Peas Internet Money Kane Brown surf mesa Carly Pearce
## Tones                       0              0          0         0            0
## Future                      0              0          0         0            0
## Jawsh 685                   0              0          0         0            0
##           Lil Baby I Jason Derulo Quavo A7s Young Thug Lil Durk Nicki Minaj
## Tones            0 1            0     0   0          0        0           0
## Future           0 0            0     0   0          0        0           0
## Jawsh 685        0 0            1     0   0          0        0           0
##           iann dior Gus Dapperton Marshmello Shay Charlie Puth Lil Tjay
## Tones             0             0          0    0            0        0
## Future            0             0          0    0            0        0
## Jawsh 685         0             0          0    0            0        0
##           Chance The Rapper beabadoobee Beyonce 50 Cent Julia Michaels
## Tones                     0           0       0       0              0
## Future                    0           0       0       0              0
## Jawsh 685                 0           0       0       0              0
##           Gwen Stefani Beabadoobee J Balvin Gunna Swae Lee Emilee Lee Brice
## Tones                0           0        0     0        0      0         0
## Future               0           0        0     0        0      0         0
## Jawsh 685            0           0        0     0        0      0         0
##           42 Dugg Tory Lanez Don Toliver Khalid Lil Wayne NAV Dance Monkey
## Tones           0          0           0      0         0   0            0
## Future          0          0           0      0         0   0            0
## Jawsh 685       0          0           0      0         0   0            0
##           Life Is Good Savage Love (Laxed - Siren Beat) Intentions Rockstar WAP
## Tones                0                                0          0        0   0
## Future               0                                0          0        0   0
## Jawsh 685            0                                0          0        0   0
##           WHATS POPPIN Breaking Me Go Crazy For The Night Laugh Now Cry Later
## Tones                0           0        0             0                   0
## Future               0           0        0             0                   0
## Jawsh 685            0           0        0             0                   0
##           Rain On Me Say So Mood My Oh My High Fashion Godzilla Ballin'
## Tones              0      0    0        0            0        0       0
## Future             0      0    0        0            0        0       0
## Jawsh 685          0      0    0        0            0        0       0
##           Supalonely Come & Go 10,000 Hours I Hope Mood Swings Holy Tusa
## Tones              0         0            0      0           0    0    0
## Future             0         0            0      0           0    0    0
## Jawsh 685          0         0            0      0           0    0    0
##           Death Bed Savage Popstar Senorita Stuck With U The Woo
## Tones             0      0       0        0            0       0
## Future            0      0       0        0            0       0
## Jawsh 685         0      0       0        0            0       0
##           If The World Was Ending Nobody But You RITMO (Bad Boys For Life)
## Tones                           0              0                         0
## Future                          0              0                         0
## Jawsh 685                       0              0                         0
##           Lemonade No Guidance Be Like That ily I Hope You're Happy Now We Paid
## Tones            0           0            0   0                       0       0
## Future           0           0            0   0                       0       0
## Jawsh 685        0           0            0   0                       0       0
##           Greece
## Tones          0
## Future         0
## Jawsh 685      0
artist_dia <- diag(onemode_artists)
#diag(onemode_artists) <- 0 

This is the sociogram representing artist to artist networks.

C <- graph_from_adjacency_matrix(onemode_artists, weighted = TRUE)
C <- simplify(C)
#plot(C, vertex.size = 4, vertex.label.cex =.75, edge.arrow.size =.5, vertex.label.dist = 1)
C.a <- get.edgelist(C)
C.df <- graph.data.frame(C.a, directed = FALSE)
C.df <- simplify(C.df)
plot(C.df, vertex.size = 4, vertex.label.cex =.75, edge.arrow.size =.5, vertex.label.dist = 1)

In part 3 we transform the two mode network into a one mode network. The diagonal here represents the total number of songs that contain at least one of the same artists. It’s the number of connections of songs to another song.

#### We then do the transformation to get the song to song relationship
onemode_songs <- B%*%t(B)
songs_dia <- diag(onemode_songs)
#diag(onemode_songs) <- 0 
C.2 <- graph_from_adjacency_matrix(onemode_songs, weighted = TRUE)
C.2 <- simplify(C.2)
#plot(C, vertex.size = 4, vertex.label.cex =.75, edge.arrow.size =.5, vertex.label.dist = 1)
C2.a <- get.edgelist(C.2)
C2.df <- graph.data.frame(C2.a, directed = FALSE)
C2.df <- simplify(C2.df)
plot(C2.df, vertex.size = 4, vertex.label.cex =.75, edge.arrow.size =.5, vertex.label.dist = 1)

The sociogram in part 2 is the communities of artists that collaborated on a top 100 song in 2020. The data also potentially could showcase weight, the number of times artist collaborated to see the strength of their relationships, however, I did not include that. The graph was made by showing two artist had relationship to each other by appearing on the same song. We see certain artist frequently collaborate and make small clusters of connections.

The sociogram in part 3 is the songs that are connected to each other because they have similar artist. It’s kind of like my spotify discover weekly, where you are shown songs you may enjoy, because similar artists are featured on them. Here you see

The undirected option was necessary because if you did the adjacency transformations or any transformations, the connection between the “institution” and the actor would be lost. As the actor was connected to the institution, that could be graphed, however while directed however, if you were to map the institution connection like we did in part 2 and 3, the data would result in zeros. I made this mistake for several hours actually and it was frustrating.