Deliverable 6_Network_Vis_1_TB

########################Complex Systems Networks##########################
library(igraph)

## 
## Attaching package: 'igraph'

## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum

## The following object is masked from 'package:base':
## 
##     union

library(readr)
#a<-read.csv("~/Box/Taurean/Year_1/SSNA/Visualizing_course_taking.csv")
b<-sns_music_collab <- read.csv("sns_music_collab.csv")
#b <- subset(b, year == 2020)
b<- b[,-1]

dim(b)

## [1] 1586    4

names(b)

## [1] "artist"   "song"     "position" "year"

#Creates a campus indicator by semester
#a$campus<-ifelse(a$term==min(a$term), as.character(a$sem1_campus), as.character(a$sem2_campus))
#head(a[a$sem1_campus!=a$sem2_campus,c("term", "sem1_campus", "sem2_campus", "campus")], 200)

#The following code retrieves the student-course connections to be saved under an graph object called "g"
#g<-graph.data.frame(a[,c("pseudoid","term_course", "gr_course", "campus")])
b$song <-paste(b$song,b$year,sep="_")
g<-graph.data.frame(b[,c("artist","song", "position", "year")])

#The following code adds the two-mode structure to the graph "g"
V(g)$type <- V(g)$name %in% b[,c("artist")] 
table(V(g)$type)[2]

## TRUE 
##  631

i<-table(V(g)$type)[2]

#Gets centrality measures
cent<-data.frame(bet=betweenness(g, normalized=T, directed = FALSE)/max(betweenness(g, normalized=T, directed = FALSE)),eig=evcent(g)$vector, degree=degree(g, mode="total")) 
cent$name<-rownames(cent) #Ids in this case
head(cent);tail(cent)

##                                      bet          eig degree
## Santana                     0.0005688123 6.017481e-17      5
## Rob Thomas                  0.0000000000 7.429830e-18      1
## Sting                       0.0000000000 0.000000e+00      1
## Missy 'Misdemeanor' Elliott 0.0438896850 8.285497e-04      3
## Dr Dre                      0.0632788002 7.876422e-03      4
## SoulDecision                0.0000000000 0.000000e+00      1
##                                                    name
## Santana                                         Santana
## Rob Thomas                                   Rob Thomas
## Sting                                             Sting
## Missy 'Misdemeanor' Elliott Missy 'Misdemeanor' Elliott
## Dr Dre                                           Dr Dre
## SoulDecision                               SoulDecision

##                                   bet          eig degree
## Laugh Now Cry Later_2021 3.230726e-02 1.836624e-01      2
## Glad You Exist_2021      2.556460e-06 4.129379e-03      2
## Lemonade_2021            1.944188e-02 1.287694e-03      4
## Monster_2021             7.981816e-02 2.535812e-02      2
## Bed_2021                 6.979238e-02 2.578108e-02      3
## All We Got_2021          1.150407e-04 1.391239e-17      2
##                                              name
## Laugh Now Cry Later_2021 Laugh Now Cry Later_2021
## Glad You Exist_2021           Glad You Exist_2021
## Lemonade_2021                       Lemonade_2021
## Monster_2021                         Monster_2021
## Bed_2021                                 Bed_2021
## All We Got_2021                   All We Got_2021

V(g)$label<-V(g)$name
V(g)$name<-1:length(V(g))

#In HTML visualizations (like NetworkD3) we need two datasets, one for links, another for nodes attributes.

#Gets edgelist from graph, also any other attribute at the edge level to be included in the mapping
links<-as.data.frame(cbind(get.edgelist(g), E(g)$position, E(g)$year))

#Needs to be numeric
links$V1<-as.numeric(as.character(links$V1))

links$V2<-as.numeric(as.character(links$V2))
str(links)

## 'data.frame':    1586 obs. of  4 variables:
##  $ V1: num  1 2 3 4 5 5 6 7 8 9 ...
##  $ V2: num  632 633 634 635 636 637 638 639 640 641 ...
##  $ V3: chr  "2" "6" "31" "56" ...
##  $ V4: chr  "2000" "2000" "2000" "2000" ...

links$V3<-round(as.numeric(as.character(links$V3)),3)
head(links)

##   V1  V2 V3   V4
## 1  1 632  2 2000
## 2  2 633  6 2000
## 3  3 634 31 2000
## 4  4 635 56 2000
## 5  5 636 63 2000
## 6  5 637 64 2000

colnames(links)<-c("source","target", "value", "year")

#Counts begin at zero in computer programming
links[,1:2]<-(links[,1:2]-1)

dim(links)

## [1] 1586    4

b1<-aggregate(position~artist, data=b, FUN=mean)

#Adding attributes at the actor level
V(g)$size<- cent$bet[match(V(g)$label, cent$name)]
V(g)$group <- round(b1$position[match(V(g)$label, b1$artist)],3)

###############################################################################
 #If you have codes with no attributes at the code level, this may be needed##
###############################################################################

# V(g)$firstgen <- ifelse(is.na(V(g)$firstgen), .5, V(g)$firstgen)
#V(g)$group <- ifelse(is.na(V(g)$group), 1, V(g)$group)

summary(V(g)$size)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.00000 0.00000 0.01299 0.03655 0.04068 1.00000

summary(V(g)$group)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    1.00   35.55   52.67   52.93   71.25  100.00     721

nodes <- data.frame(name=c(paste("Artist: ", V(g)$label[1:i], " Position: ", V(g)$group[1:i]),V(g)$label[(i+1):length(V(g)$label)]), size=abs(V(g)$size), position=V(g)$group) 
nodes$name<-as.character(nodes$name)

#Name of course, rather than code
#nodes$name[(i+1):length(V(g)$label)] <- a$crse_ld[match(nodes$name[(i+1):length(V(g)$label)], a$term_course)]
#head(nodes);tail(nodes)

nodes$group<-NA
nodes$group[1:i]<-cut(nodes$position[1:i], c(1,35,52,71,max(nodes$position[1:i])), right=TRUE)
table(is.na(nodes$group))

## 
## FALSE  TRUE 
##   626   726

table(nodes$group)

## 
##   1   2   3   4 
## 151 156 161 158

#head(nodes[is.na(nodes$group),],20)
nodes$group<-ifelse(is.na(nodes$group), "Song", ifelse(nodes$group==1, "Low Quartile", ifelse(nodes$group==2, "Second quartile", ifelse(nodes$group==3, "Third quartile", "Top quartile"))))
counts<-data.frame(table(nodes$group))

counts$labels <- paste(counts$Var1, ", N= ", counts$Freq, sep="")
nodes$groups <- counts$labels[match(nodes$group, counts$Var1)] 
head(counts)

##              Var1 Freq                  labels
## 1    Low Quartile  151    Low Quartile, N= 151
## 2 Second quartile  156 Second quartile, N= 156
## 3            Song  726            Song, N= 726
## 4  Third quartile  161  Third quartile, N= 161
## 5    Top quartile  158    Top quartile, N= 158

library(networkD3)
library(magrittr)
library(htmlwidgets)

## 
## Attaching package: 'htmlwidgets'

## The following object is masked from 'package:networkD3':
## 
##     JS

library(htmltools)
netviz<-forceNetwork(Links = links, Nodes = nodes,
                  Source = 'source', Target = 'target',
                  NodeID = 'name',
                  Group = "groups", # color nodes by group calculated earlier
                  charge = -30, # node repulsion
                  linkDistance = JS("function(d) { return d.linkDistance; }"),#JS("function(d){return d.value}"),
                  linkWidth = JS("function(d) { return Math.sqrt(d.value)*2; }"),
                  opacity = 0.8,
                  Value = "value",
                  Nodesize = 'size', 
                  radiusCalculation = JS("Math.sqrt(d.nodesize*30)+4"),
                  zoom = T, 
                  fontSize=14,
                  bounded= F,
                  legend= TRUE)
#                 linkColour = JS("d3.scaleOrdinal(d3.schemeCategory10)"))

HTMLaddons <- 
"function(el, x) { 
d3.select('body').style('background-color', ' #212f3d ')
d3.selectAll('.legend text').style('fill', 'white') 
 d3.selectAll('.link').append('svg:title')
      .text(function(d) { return 'Position: ' + d.value + ', Year: ' + d.year ; })
  var options = x.options;
  var svg = d3.select(el).select('svg')
  var node = svg.selectAll('.node');
  var link = svg.selectAll('link');
  var mouseout = d3.selectAll('.node').on('mouseout');
  function nodeSize(d) {
    if (options.nodesize) {
      return eval(options.radiusCalculation);
    } else {
      return 6;
    }
  }

  
d3.selectAll('.node').on('click', onclick)

  function onclick(d) {
    if (d3.select(this).on('mouseout') == mouseout) {
      d3.select(this).on('mouseout', mouseout_clicked);
    } else {
      d3.select(this).on('mouseout', mouseout);
    }
  }

  function mouseout_clicked(d) {
    node.style('opacity', +options.opacity);
    link.style('opacity', +options.opacity);

    d3.select(this).select('circle').transition()
      .duration(750)
      .attr('r', function(d){return nodeSize(d);});
    d3.select(this).select('text').transition()
    
      .duration(1250)
      .attr('x', 0)
      .style('font', options.fontSize + 'px ');
  }

}
"
netviz$x$links$linkDistance <- (1/links$value)*500
netviz$x$links$year <- links$year
onRender(netviz, HTMLaddons)

From our key actor analysis we see that the artist with the highest eigenvector centrality is Drake. The artist with the highest betweenness centrality is David Guetta.

topeig <- cent[order(-cent$eig),]
topbet <- cent[order(-cent$bet),]
head(topeig)

##                            bet       eig degree                 name
## Drake                0.7997468 1.0000000     29                Drake
## Rihanna              0.9538800 0.5030951     23              Rihanna
## Too Good_2016        0.0845163 0.2586505      2        Too Good_2016
## Take Care_2012       0.0845163 0.2586505      2       Take Care_2012
## What's My Name?_2011 0.0845163 0.2586505      2 What's My Name?_2011
## Work_2016            0.0845163 0.2586505      2            Work_2016

head(topbet)

##                     bet        eig degree          name
## David Guetta  1.0000000 0.14066500     25  David Guetta
## Rihanna       0.9538800 0.50309506     23       Rihanna
## Justin Bieber 0.9066926 0.14260961     22 Justin Bieber
## Drake         0.7997468 1.00000000     29         Drake
## Ludacris      0.7911125 0.02348136     18      Ludacris
## Nicki Minaj   0.7879056 0.05639110     19   Nicki Minaj

Deliverable 6_Network_Vis_1_TB_MarkDown

Taurean Butler

12/26/2021