These are the main functions used to create and analyze our network.

First, install and load the following packages:

library(igraph)
## Warning: package 'igraph' was built under R version 3.6.3
## 
## Attaching package: 'igraph'
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## The following object is masked from 'package:base':
## 
##     union
library(sna)
## Warning: package 'sna' was built under R version 3.6.3
## Loading required package: statnet.common
## Warning: package 'statnet.common' was built under R version 3.6.3
## 
## Attaching package: 'statnet.common'
## The following object is masked from 'package:base':
## 
##     order
## Loading required package: network
## Warning: package 'network' was built under R version 3.6.3
## network: Classes for Relational Data
## Version 1.16.0 created on 2019-11-30.
## copyright (c) 2005, Carter T. Butts, University of California-Irvine
##                     Mark S. Handcock, University of California -- Los Angeles
##                     David R. Hunter, Penn State University
##                     Martina Morris, University of Washington
##                     Skye Bender-deMoll, University of Washington
##  For citation information, type citation("network").
##  Type help("network-package") to get started.
## 
## Attaching package: 'network'
## The following objects are masked from 'package:igraph':
## 
##     %c%, %s%, add.edges, add.vertices, delete.edges,
##     delete.vertices, get.edge.attribute, get.edges,
##     get.vertex.attribute, is.bipartite, is.directed,
##     list.edge.attributes, list.vertex.attributes,
##     set.edge.attribute, set.vertex.attribute
## sna: Tools for Social Network Analysis
## Version 2.5 created on 2019-12-09.
## copyright (c) 2005, Carter T. Butts, University of California-Irvine
##  For citation information, type citation("sna").
##  Type help(package="sna") to get started.
## 
## Attaching package: 'sna'
## The following objects are masked from 'package:igraph':
## 
##     betweenness, bonpow, closeness, components, degree,
##     dyad.census, evcent, hierarchy, is.connected, neighborhood,
##     triad.census
library(intergraph)
## Warning: package 'intergraph' was built under R version 3.6.3
library(RColorBrewer)
library(randomcoloR)
## Warning: package 'randomcoloR' was built under R version 3.6.3
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:igraph':
## 
##     as_data_frame, groups, union
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(caret) 
## Warning: package 'caret' was built under R version 3.6.3
## Loading required package: lattice
library(ggplot2) 
library(reshape2) 

Load Functions

The following function builds the graph with all of the node and edge level attributes that are found in the data provided or derived by the data provided.

build_graph <- function (nodes, edges) {
  g = graph.empty(nrow(nodes), directed = FALSE)
  # covert R factor to character
  V(g)$name = as.character(nodes$Id)
  V(g)$ACTIVITY = as.character(nodes$ACTIVITY)  #### Used for Q2
  V(g)$pi_key = as.character(nodes$pi_key)
  V(g)$discipline_classification = as.character(nodes$discipline_classification) #### Used for Q4
  # a edge array: odd index -> source, even index -> target
  edge_list = as.vector(t(cbind(as.character(edges$Source), as.character(edges$Target))))
  g = add_edges(g, edge_list)
  E(g)$Label = as.character(edges$Label)
  E(g)$PUB_YEAR = as.character(edges$PUB_YEAR)
  E(g)$COUNTRY = as.character(edges$COUNTRY)
  E(g)$ACTIVITY = as.character(edges$ACTIVITY)
  E(g)$PMID = as.character(edges$PMID_x)
  #E(g)$weight = edges$normalized_discipline_weight ### Used for weighted degree Q4
  E(g)$weight = edges$normalized_activity_weight ### Used for weighted degree Q3
  E(g)$discipline = as.character(edges$discipline_classification)
  V(g)$Group = as.character(nodes$Group) #### Used for Q1
  
  V(g)$Unique_activities = as.character(nodes$unique_activities)
  V(g)$Project_count = as.character(nodes$project_count)
  V(g)$Act_per_proj_quant = as.character(nodes$Act_per_proj_quant) ### Used for Q3
  V(g)$Activity_per_project = as.character(nodes$activity_per_project)
  V(g)$discipline.group = as.character(nodes$discipline.group)
  V(g)$unique_activities = as.character(nodes$unique_activities)
  V(g)$unique_activity_grouped = as.character(nodes$unique_activity_grouped)
  
  #remove nodes by degree 
  #g = filter_node_by_number_publication(g, num_pub_floor)
  return(g)
}

The next functions are essential for calculating the network level statistics.

This function calculates assortativity, which is a single statistic measuring the degree of connections betweeen similiar nodes.

get_assortativity <- function(g, attr) {
  cat = igraph::get.vertex.attribute(g, attr )
  #!! have to convert vertex values from chartacter to factor
  assortativity = assortativity_nominal(g, as.factor(cat), directed = F)
  return(assortativity);
}

The next function is used to calculate average degree, which calculates the average connections per node for each attribute specified.

get_degree <- function(g, attr){
  cat = igraph::get.vertex.attribute(g, attr )
  V(g)$degree = igraph::degree(g)
  V(g)$strength = igraph::strength(g)
  return(g)
}

The next function calculates a confusion matrix which generates percentages associated with frequencies of collaboration across levels of a particular attribute.

# calcuate a confusion table between groups 
get_group_confusion <- function(nodes, edges, attr) {
  from_to = select(edges, Source, Target)
  from_to$Source = as.character(from_to$Source)
  from_to$Target = as.character(from_to$Target)
  nodes$Id = as.character(nodes$Id)
  from_to = left_join(from_to, nodes, by= c('Source' = 'Id'))
  from_to = left_join(from_to, nodes, by= c('Target' = 'Id'))
  x = paste0(attr, '.x')
  y = paste0(attr, '.y')
  cols <- c(x, y)
  colNums <- match(cols,colnames(from_to))
  fg_tg = select_(from_to, x, y)
  colnames(fg_tg) = c('node_a', 'node_b')
  Source_Groups = as.factor(fg_tg$node_a);
  Target_Groups = as.factor(fg_tg$node_b);
  tbl = table(Source_Groups, Target_Groups)
  prop_table = prop.table(tbl)
  #since it is undirected graph, convert it into triangular
  p1 = prop_table;
  p2 = t(prop_table);
  p3 = p1 + p2
  p4 = lower.tri.remove(p3)
  diag(p4) = diag(p4) / 2
  prop_table = p4
  return(prop_table)
}

The next function calculates the transitivity, which measures how tightly knit a community is.

get_transitivity <- function(g) {
  cc = transitivity(g, type='local', vids=V(g)$name)
  V(g)$cc = cc 
  return(g)
}

The following function counts the number of cliques with n connected nodes in the network. For our analysis, we used this function to count complete triads.

# looping through clique to count group membership in clique 
get_cliques_membership <- function(g, n, attr) {
  cs  = cliques(g, min=n)
  cs_len = sapply(cs, length)
  cs_n = cs[cs_len == n]
  cat = unique(igraph::get.vertex.attribute(g, attr))
  numCli = length(cs_n)
  m = matrix(0, nrow = numCli, ncol = length(cat))
  colnames(m) = cat
  #looping throught cliques to count membership by groups
  for (i in 1:numCli) {
    c = cs_n[[i]]
    a = vertex_attr(g,attr, c)
    for (ct in cat) {
      m[i,ct] = sum(a==ct)
    }
  }
  return(m)
}

This function takes the confusion matrix and adjusts the percentages to reflect the percentages to be a proportion of the particular level of an attribute rather than reflecting the overall distribution.

get_confusion_within_group <- function(prop_table) {
  conf_mtx <- prop_table
  conf_mtx[lower.tri(conf_mtx)]<-t(conf_mtx)[lower.tri(conf_mtx)]
  conf_mtx <- data.frame(conf_mtx) %>% dplyr::mutate(Source_Groups = as.character(Source_Groups), 
                                                     Target_Groups = as.character(Target_Groups))
  lvl <- unique(conf_mtx$Source_Groups)
  conf_mtx[,"percent_within"] = NA
  for (i in 1:nrow(conf_mtx)){
    for (j in 1:length(lvl)){
      if (conf_mtx[i,"Source_Groups"] == lvl[j]){
        sum_relevant_rows <- conf_mtx %>% dplyr::filter(Source_Groups == lvl[j])
        sum_relevant_rows <- sum(sum_relevant_rows$Freq)
        conf_mtx[i,"percent_within"] = as.numeric(conf_mtx[i,"Freq"])/sum_relevant_rows
      }
      else {return}
    }
  }
  return(conf_mtx)
}

The next function takes the output of the previous function to calculate the within and outside group collaboration for levels of a particular attribute.

get_in_out_tbl <- function(conf_mtx){
  sum_tbl <- conf_mtx %>% dplyr::group_by(Source_Groups) %>% 
    dplyr::summarise(dist = sum(Freq)) %>%
    select(Source_Groups,dist) %>% 
    filter(is.na(dist)==FALSE) %>% 
    arrange(desc(dist))
  in_out_pct <- conf_mtx %>% 
    dplyr::mutate(match = ifelse(Source_Groups == Target_Groups,"within group","outside group"), 
                  percent_within = ifelse(is.na(percent_within)==TRUE, 0, percent_within)) %>% 
    dplyr::group_by(Source_Groups, match) %>% 
    dplyr::summarise(in_out_pct = sum(percent_within))
  return(in_out_pct)
}

The following functions use the utility functions listed above and get network statistics by year and overall.

analyze_graph <- function(g, nodes, edges, attr) {
  #attr: Group, Activity, Discipline
  stats = list()
  print(attr)
  # 1.assortativity
  assortativity = get_assortativity(g, attr)
  stats$assortativity = assortativity
# 2. Average degree, transtivity, and degee distributions
  g = get_degree(g, attr)
  g = get_transitivity(g)
  var = vertex_attr(g, attr)
  lvls =  unique(var)
  lvl_len = length(lvls)
  dgs = list()
  trs = list()
  strs = list()
  dg_dist = list()
  for (i in 1:lvl_len) {
    lvl = lvls[i]
    lvl_nodes = V(g)[var == lvl]
    dgs[[lvl]] = mean(lvl_nodes$degree,  na.rm = TRUE)
    trs[[lvl]] = mean(lvl_nodes$cc,  na.rm = TRUE)
    strs[[lvl]] = mean(lvl_nodes$strength,  na.rm = TRUE)
    dg = degree.distribution(g, v=lvl_nodes)
    dg_dist[[lvl]] = dg
  }
  dgs[['all_nodes']] =   mean(V(g)$degree, na.rm = TRUE)
  strs[['all_nodes']] =   mean(V(g)$strength, na.rm = TRUE)
  all_dg = degree.distribution(g)
  dg_dist[['all_nodes']] = all_dg
  
  stats$degree = dgs
  stats$strength = strs
  stats$transitivity = trs
  stats$degree_dist =  dg_dist
  
  # 3. confusion matrix  dyad level
  prop_table = get_group_confusion(nodes, edges, attr)
  stats$prop_table = prop_table
  
  # 3a. within group vs outside group %
  in_out_table = get_in_out_tbl(get_confusion_within_group(prop_table))
  stats$in_out_table = in_out_table
  
  ## calcuate graph level statistics
  # the ratio of the number of edges and the number of possible edges
  stats$density = edge_density(g, loops=FALSE)
  
  
  #count group membership in complete triad graph
  print(paste(rep('#', 30),collapse = ''))
  n = 3 
  m = get_cliques_membership(g, n, attr)
  
  triad_counts = list()
  for (i in 1:lvl_len) {
    lvl = lvls[i]
    lvl_nodes = V(g)[var == lvl]
    triad_count = sum(m[, lvl] == 3)
    triad_counts[[lvl]] = triad_count
  }
  stats$triads_counts = triad_counts
  return(stats)
  #group blocks
  #clustering analysis
  #get_blockmodel()
}

get_stats <- function(g, attr) {
  all_years_stats = analyze_graph(g, nodes, edges,attr)
  # looping over years
  yr_range = 2002:2016
  stats_by_year = list()
  for (yr in yr_range) {
    yr_edges = edges[edges$PUB_YEAR<=yr,]
    yr_g = build_graph(nodes, yr_edges)
    stat =  analyze_graph(yr_g, nodes, yr_edges, attr)
    print(stat)
    stats_by_year[[as.character(yr)]]  = stat
  }
  stats = list()
  stats[['all_years_stats']] = all_years_stats
  stats[['stats_by_year']] = stats_by_year
  return(stats)
}

Plotting Functions

This function plots the Degree Distribution.

plot_deg_dist1 <- function(stats) {
  par(mai=c(0.5, 0.5, 0.5, 0.5))
  
  # layout(cbind(1,2), widths=c(6,1))  # put legend on bottom 1/8th of the chart
  all_years_stats = stats$all_years_stats
  dists = all_years_stats$degree_dist
  n = length(dists)
  palette <- distinctColorPalette(n)
  #palette<- c("blue","green4")
  
  dists = all_years_stats$degree_dist
  all_node_dist = dists[['all_nodes']]
  all_dgs = 1:length(all_node_dist)
  all_dgs = all_dgs[all_node_dist != 0]
  all_node_dist = all_node_dist[all_node_dist != 0]
  plot( x=all_dgs[all_dgs < 20], y=all_node_dist[all_dgs < 20], pch=19, cex=0.5, col="black", 
        xlab="Degree", ylab="Frequency", ylim=c(0, 0.2), type="o",lty=1,lwd = 2)
  lvls = names(dists)
  for (i in 1:length(lvls)) {
    lvl = lvls[[i]]
    if ( lvl != 'all_nodes') {
      lvl_dist = dists[[lvl]]
      lvl_dg= 1:length(lvl_dist)
      lvl_dg = lvl_dg[lvl_dist != 0]
      lvl_dist = lvl_dist[lvl_dist != 0]
      lines( x=lvl_dg[lvl_dg < 20], y=lvl_dist[lvl_dg < 20], pch=19, cex=0.5, col=palette[[i]], 
             xlab="Degree", ylab="Frequency", type="o", lty=1, lwd = 2)
    }
  }
  legend(15, 0.17, c('all_nodes', lvls[1:length(lvls)-1]), col = c('black',palette[1:length(lvls)-1]),
         text.col = c('black',palette[1:length(lvls)-1]), lty = 1,
         merge = TRUE, xpd=T, cex=1)
  #dev.off()
}

This function plots the confusion matrix as a heat map.

plot_block_heatmap <- function(conf) {
  conf.l = melt(conf)
  ggplot(conf.l, aes(Source_Groups, Target_Groups)) + geom_tile(aes(fill = value),colour = "white")  +  
  scale_fill_gradient(low = "white", high = "steelblue") +
 theme(axis.text.x = element_text(angle = 90, hjust = 1))
}

This function plots the within and outside group percentages by level of attribute.

plot_in_out <- function(stats, attr_name){
  plot.data <- stats$all_years_stats$in_out_table %>% 
    arrange(Source_Groups, desc(match))%>%
    filter(Source_Groups != 'NULL.'& Source_Groups != 'PHYS'&
             Source_Groups != 'COMP' & Source_Groups != 'ENGI') %>%
    filter(Source_Groups != 'DP1'& Source_Groups != 'P42'& Source_Groups != 'P51'&
             Source_Groups != 'P60'& Source_Groups != 'R21'& Source_Groups != 'R33'&
             Source_Groups != 'R56'& substr(Source_Groups,1,2) != 'RC'&
             Source_Groups != 'RL1'& Source_Groups != 'U01'& Source_Groups != 'U41'&
             Source_Groups != 'UH2'& Source_Groups != 'UL1'& Source_Groups != 'UM1')
  plot.data$Source_Groups <- as.factor(plot.data$Source_Groups)
  ggplot(data=plot.data, aes(x=Source_Groups, y=in_out_pct, fill=match)) +
    geom_bar(stat='identity') + coord_flip() +
    scale_fill_manual(values=c("slategray2", "navyblue")) +
    xlab(attr_name) + ylab("Percentage of Group") + 
    ggtitle("Percentage of Collaboration Within and Outside of Group") 
}

Function to print statistics.

print_stat <- function(all_years_stats) {
  
  print(paste(c(rep('#', 15), 'assortativity', rep('#', 15)),collapse = ''))
  print(paste0('assortativity: ', as.character(all_years_stats$assortativity)))
  cat('\n')
  cat('\n')
  
  #print degree
  print(paste(c(rep('#', 15), 'Degree', rep('#', 15)),collapse = ''))
  dgs = all_years_stats[['degree']]
  dns = names(dgs)
  for (d in dns) {
    print(paste0('Average ', d,  ' Degree: ', sprintf("%.6f",dgs[[d]] )))
  }
  cat('\n')
  cat('\n')
  
  print(paste(c(rep('#', 15), 'Confusion Table', rep('#', 15)),collapse = ''))
  print(all_years_stats$prop_table)
  cat('\n')
  cat('\n')
  
  #print Transitivity
  print(paste(c(rep('#', 15), 'Transitivity', rep('#', 15)),collapse = ''))
  print('The below statistics ignore nodes without edges')
  trs = all_years_stats[['transitivity']]
  tns = names(trs)
  for (t in tns) {
    print(paste0('Average ', t,  ' Transitivity: ', sprintf("%.6f",trs[[t]] )))
  }
  cat('\n')
  cat('\n')
  
  #print triad count
  print(paste(c(rep('#', 15), 'Complete Triads', rep('#', 15)),collapse = ''))
  t_c = all_years_stats[['triads_counts']]
  tns = names(t_c)
  for (t in tns) {
    print(paste0('The number of  ', t,  ' Complete Triads: ', sprintf("%.6f",t_c[[t]] )))
  }
  cat('\n')
  cat('\n')
  
}

Analysis

Load edge and node files. Only include complete nodes and build graph.

getwd()
## [1] "C:/Users/Sebastian Pasotr/Documents/Data_Coding/Social Network Analytics/Week_3"
list.files()
## [1] "6_MainNetworkFunctionsModifiedBySoominCompareOriginalAndReducedNetwork.Rmd"  
## [2] "6_MainNetworkFunctionsModifiedBySoominCompareOriginalAndReducedNetwork_files"
## [3] "drive-download-20200513T105441Z-001.zip"                                     
## [4] "nodes_added_grouped_attributes.csv"                                          
## [5] "study_comp_edges_disc.csv"                                                   
## [6] "study_comp_edges_reduced1.csv"
nodes = read.csv("nodes_added_grouped_attributes.csv", header=T)
nodes = nodes[complete.cases(nodes$Id),]
edges = read.csv("study_comp_edges_disc.csv", header=T)
edges2 = read.csv("study_comp_edges_reduced1.csv", header=T)
g <- build_graph(nodes, edges)

Question 1

q1_stats = get_stats(g, attr='Group')
## Warning: select_() is deprecated. 
## Please use select() instead
## 
## The 'programming' vignette or the tidyeval book can help you
## to program with select() : https://tidyeval.tidyverse.org
## This warning is displayed once per session.
## [1] "Q1: Does NIGMS researchers collaborate with each other more ofthen than with other groups?"

- original network -

## [1] "###############assortativity###############"
## [1] "assortativity: 0.209681388381732"
## 
## 
## [1] "###############Degree###############"
## [1] "Average comp Degree: 10.942169"
## [1] "Average study Degree: 11.000000"
## [1] "Average all_nodes Degree: 10.973274"
## 
## 
## [1] "###############Confusion Table###############"
##              Target_Groups
## Source_Groups      comp     study
##         comp  0.2644611 0.3927339
##         study           0.3428050
## 
## 
## [1] "###############Transitivity###############"
## [1] "The below statistics ignore nodes without edges"
## [1] "Average comp Transitivity: 0.167929"
## [1] "Average study Transitivity: 0.194331"
## 
## 
## [1] "###############Complete Triads###############"
## [1] "The number of  comp Complete Triads: 83.000000"
## [1] "The number of  study Complete Triads: 200.000000"

- reduced network -

# # reduced network
# q1_all_years_stats2 = q1_stats2[['all_years_stats']] 
# print_stat(q1_all_years_stats2)
# #plot_deg_dist1(q1_all_years_stats)
# plot_block_heatmap(q1_all_years_stats2$prop_table)
# plot_in_out(q1_stats2,"Comparison vs Study Group")

Question 2

q2_stats = get_stats(g, attr='ACTIVITY')
## [1] "Q2 Does the type of grant awarded influence collaboration behavior?"

- original network -

## [1] "###############assortativity###############"
## [1] "assortativity: 0.102341281934154"
## 
## 
## [1] "###############Degree###############"
## [1] "Average P50 Degree: 12.651163"
## [1] "Average P41 Degree: 13.216981"
## [1] "Average P01 Degree: 10.772414"
## [1] "Average R37 Degree: 6.279070"
## [1] "Average P30 Degree: 12.146341"
## [1] "Average U19 Degree: 9.761905"
## [1] "Average U54 Degree: 22.319444"
## [1] "Average R01 Degree: 6.256098"
## [1] "Average P20 Degree: 9.800000"
## [1] "Average R56 Degree: 3.550000"
## [1] "Average R21 Degree: 2.666667"
## [1] "Average P42 Degree: 15.200000"
## [1] "Average UL1 Degree: 1.500000"
## [1] "Average DP1 Degree: 9.000000"
## [1] "Average M01 Degree: 12.869565"
## [1] "Average U41 Degree: 8.500000"
## [1] "Average P51 Degree: 20.000000"
## [1] "Average U01 Degree: 16.944444"
## [1] "Average RC4 Degree: 20.833333"
## [1] "Average RC1 Degree: 7.250000"
## [1] "Average RC2 Degree: 44.625000"
## [1] "Average UH2 Degree: 25.000000"
## [1] "Average RL1 Degree: 1.000000"
## [1] "Average R33 Degree: 0.000000"
## [1] "Average UM1 Degree: 202.000000"
## [1] "Average P60 Degree: 0.000000"
## [1] "Average all_nodes Degree: 10.973274"
## 
## 
## [1] "###############Confusion Table###############"
##              Target_Groups
## Source_Groups          DP1          M01          P01          P20
##           DP1 0.0000000000 0.0000000000 0.0006088898 0.0000000000
##           M01              0.0018266694 0.0109600162 0.0008118531
##           P01                           0.0416074690 0.0040592653
##           P20                                        0.0010148163
##           P30                                                    
##           P41                                                    
##           P42                                                    
##           P50                                                    
##           P51                                                    
##           P60                                                    
##           R01                                                    
##           R21                                                    
##           R33                                                    
##           R37                                                    
##           R56                                                    
##           RC1                                                    
##           RC2                                                    
##           RC4                                                    
##           RL1                                                    
##           U01                                                    
##           U19                                                    
##           U41                                                    
##           U54                                                    
##           UH2                                                    
##           UL1                                                    
##           UM1                                                    
##              Target_Groups
## Source_Groups          P30          P41          P42          P50
##           DP1 0.0010148163 0.0022325959 0.0000000000 0.0000000000
##           M01 0.0012177796 0.0014207428 0.0000000000 0.0154252080
##           P01 0.0182666937 0.0505378526 0.0018266694 0.0087274203
##           P20 0.0192815100 0.0024355592 0.0054800081 0.0022325959
##           P30 0.0010148163 0.0085244571 0.0006088898 0.0018266694
##           P41              0.0259792977 0.0018266694 0.0121777958
##           P42                           0.0002029633 0.0010148163
##           P50                                        0.0056829714
##           P51                                                    
##           P60                                                    
##           R01                                                    
##           R21                                                    
##           R33                                                    
##           R37                                                    
##           R56                                                    
##           RC1                                                    
##           RC2                                                    
##           RC4                                                    
##           RL1                                                    
##           U01                                                    
##           U19                                                    
##           U41                                                    
##           U54                                                    
##           UH2                                                    
##           UL1                                                    
##           UM1                                                    
##              Target_Groups
## Source_Groups          P51          P60          R01          R21
##           DP1 0.0000000000 0.0000000000 0.0012177796 0.0000000000
##           M01 0.0014207428 0.0000000000 0.0054800081 0.0000000000
##           P01 0.0081185305 0.0000000000 0.0322711589 0.0006088898
##           P20 0.0004059265 0.0000000000 0.0028414857 0.0000000000
##           P30 0.0000000000 0.0000000000 0.0131926121 0.0000000000
##           P41 0.0002029633 0.0000000000 0.0819971585 0.0002029633
##           P42 0.0000000000 0.0000000000 0.0006088898 0.0000000000
##           P50 0.0016237061 0.0000000000 0.0117718693 0.0004059265
##           P51 0.0000000000 0.0000000000 0.0000000000 0.0000000000
##           P60              0.0000000000 0.0000000000 0.0000000000
##           R01                           0.0466815506 0.0006088898
##           R21                                        0.0000000000
##           R33                                                    
##           R37                                                    
##           R56                                                    
##           RC1                                                    
##           RC2                                                    
##           RC4                                                    
##           RL1                                                    
##           U01                                                    
##           U19                                                    
##           U41                                                    
##           U54                                                    
##           UH2                                                    
##           UL1                                                    
##           UM1                                                    
##              Target_Groups
## Source_Groups          R33          R37          R56          RC1
##           DP1 0.0000000000 0.0004059265 0.0000000000 0.0000000000
##           M01 0.0000000000 0.0006088898 0.0000000000 0.0004059265
##           P01 0.0000000000 0.0186726203 0.0020296326 0.0008118531
##           P20 0.0000000000 0.0006088898 0.0000000000 0.0000000000
##           P30 0.0000000000 0.0085244571 0.0012177796 0.0000000000
##           P41 0.0000000000 0.0219200325 0.0012177796 0.0002029633
##           P42 0.0000000000 0.0014207428 0.0004059265 0.0000000000
##           P50 0.0000000000 0.0052770449 0.0048711183 0.0008118531
##           P51 0.0000000000 0.0018266694 0.0002029633 0.0000000000
##           P60 0.0000000000 0.0000000000 0.0000000000 0.0000000000
##           R01 0.0000000000 0.0174548407 0.0028414857 0.0010148163
##           R21 0.0000000000 0.0004059265 0.0006088898 0.0002029633
##           R33 0.0000000000 0.0000000000 0.0000000000 0.0000000000
##           R37              0.0060888979 0.0004059265 0.0018266694
##           R56                           0.0000000000 0.0000000000
##           RC1                                        0.0000000000
##           RC2                                                    
##           RC4                                                    
##           RL1                                                    
##           U01                                                    
##           U19                                                    
##           U41                                                    
##           U54                                                    
##           UH2                                                    
##           UL1                                                    
##           UM1                                                    
##              Target_Groups
## Source_Groups          RC2          RC4          RL1          U01
##           DP1 0.0000000000 0.0034503755 0.0000000000 0.0002029633
##           M01 0.0032474122 0.0000000000 0.0000000000 0.0040592653
##           P01 0.0062918612 0.0028414857 0.0000000000 0.0095392734
##           P20 0.0002029633 0.0004059265 0.0000000000 0.0004059265
##           P30 0.0002029633 0.0000000000 0.0000000000 0.0048711183
##           P41 0.0028414857 0.0024355592 0.0000000000 0.0038563020
##           P42 0.0004059265 0.0000000000 0.0000000000 0.0000000000
##           P50 0.0004059265 0.0034503755 0.0000000000 0.0014207428
##           P51 0.0000000000 0.0000000000 0.0000000000 0.0014207428
##           P60 0.0000000000 0.0000000000 0.0000000000 0.0000000000
##           R01 0.0030444490 0.0018266694 0.0002029633 0.0044651918
##           R21 0.0000000000 0.0000000000 0.0000000000 0.0000000000
##           R33 0.0000000000 0.0000000000 0.0000000000 0.0000000000
##           R37 0.0002029633 0.0042622285 0.0000000000 0.0012177796
##           R56 0.0000000000 0.0002029633 0.0000000000 0.0002029633
##           RC1 0.0000000000 0.0000000000 0.0000000000 0.0000000000
##           RC2 0.0249644814 0.0004059265 0.0000000000 0.0032474122
##           RC4              0.0010148163 0.0000000000 0.0002029633
##           RL1                           0.0000000000 0.0000000000
##           U01                                        0.0008118531
##           U19                                                    
##           U41                                                    
##           U54                                                    
##           UH2                                                    
##           UL1                                                    
##           UM1                                                    
##              Target_Groups
## Source_Groups          U19          U41          U54          UH2
##           DP1 0.0000000000 0.0000000000 0.0000000000 0.0000000000
##           M01 0.0024355592 0.0000000000 0.0085244571 0.0000000000
##           P01 0.0062918612 0.0004059265 0.0483052567 0.0002029633
##           P20 0.0036533387 0.0000000000 0.0048711183 0.0000000000
##           P30 0.0024355592 0.0002029633 0.0176578039 0.0000000000
##           P41 0.0042622285 0.0000000000 0.0328800487 0.0000000000
##           P42 0.0004059265 0.0000000000 0.0008118531 0.0000000000
##           P50 0.0024355592 0.0002029633 0.0237467018 0.0000000000
##           P51 0.0000000000 0.0000000000 0.0042622285 0.0048711183
##           P60 0.0000000000 0.0000000000 0.0000000000 0.0000000000
##           R01 0.0030444490 0.0004059265 0.0345037548 0.0000000000
##           R21 0.0000000000 0.0000000000 0.0002029633 0.0000000000
##           R33 0.0000000000 0.0000000000 0.0000000000 0.0000000000
##           R37 0.0024355592 0.0000000000 0.0095392734 0.0000000000
##           R56 0.0000000000 0.0000000000 0.0002029633 0.0000000000
##           RC1 0.0004059265 0.0000000000 0.0002029633 0.0000000000
##           RC2 0.0008118531 0.0000000000 0.0012177796 0.0000000000
##           RC4 0.0014207428 0.0000000000 0.0024355592 0.0000000000
##           RL1 0.0000000000 0.0000000000 0.0000000000 0.0000000000
##           U01 0.0026385224 0.0002029633 0.0172518774 0.0050740816
##           U19 0.0028414857 0.0000000000 0.0032474122 0.0000000000
##           U41              0.0010148163 0.0000000000 0.0000000000
##           U54                           0.0403896895 0.0000000000
##           UH2                                        0.0000000000
##           UL1                                                    
##           UM1                                                    
##              Target_Groups
## Source_Groups          UL1          UM1
##           DP1 0.0000000000 0.0000000000
##           M01 0.0000000000 0.0004059265
##           P01 0.0002029633 0.0022325959
##           P20 0.0000000000 0.0000000000
##           P30 0.0000000000 0.0000000000
##           P41 0.0000000000 0.0012177796
##           P42 0.0000000000 0.0002029633
##           P50 0.0002029633 0.0010148163
##           P51 0.0000000000 0.0000000000
##           P60 0.0000000000 0.0000000000
##           R01 0.0000000000 0.0002029633
##           R21 0.0000000000 0.0000000000
##           R33 0.0000000000 0.0000000000
##           R37 0.0000000000 0.0004059265
##           R56 0.0000000000 0.0000000000
##           RC1 0.0000000000 0.0000000000
##           RC2 0.0000000000 0.0000000000
##           RC4 0.0000000000 0.0000000000
##           RL1 0.0000000000 0.0000000000
##           U01 0.0000000000 0.0000000000
##           U19 0.0000000000 0.0000000000
##           U41 0.0000000000 0.0000000000
##           U54 0.0002029633 0.0353156079
##           UH2 0.0000000000 0.0000000000
##           UL1 0.0000000000 0.0000000000
##           UM1              0.0000000000
## 
## 
## [1] "###############Transitivity###############"
## [1] "The below statistics ignore nodes without edges"
## [1] "Average P50 Transitivity: 0.226433"
## [1] "Average P41 Transitivity: 0.171009"
## [1] "Average P01 Transitivity: 0.203538"
## [1] "Average R37 Transitivity: 0.164108"
## [1] "Average P30 Transitivity: 0.187444"
## [1] "Average U19 Transitivity: 0.183554"
## [1] "Average U54 Transitivity: 0.192601"
## [1] "Average R01 Transitivity: 0.162702"
## [1] "Average P20 Transitivity: 0.179750"
## [1] "Average R56 Transitivity: 0.133333"
## [1] "Average R21 Transitivity: 0.111111"
## [1] "Average P42 Transitivity: 0.357381"
## [1] "Average UL1 Transitivity: 0.000000"
## [1] "Average DP1 Transitivity: 0.055556"
## [1] "Average M01 Transitivity: 0.242348"
## [1] "Average U41 Transitivity: 0.000000"
## [1] "Average P51 Transitivity: 0.101299"
## [1] "Average U01 Transitivity: 0.200764"
## [1] "Average RC4 Transitivity: 0.093778"
## [1] "Average RC1 Transitivity: 0.316667"
## [1] "Average RC2 Transitivity: 0.275570"
## [1] "Average UH2 Transitivity: 0.000000"
## [1] "Average RL1 Transitivity: NaN"
## [1] "Average R33 Transitivity: NaN"
## [1] "Average UM1 Transitivity: 0.198830"
## [1] "Average P60 Transitivity: NaN"
## 
## 
## [1] "###############Complete Triads###############"
## [1] "The number of  P50 Complete Triads: 0.000000"
## [1] "The number of  P41 Complete Triads: 7.000000"
## [1] "The number of  P01 Complete Triads: 6.000000"
## [1] "The number of  R37 Complete Triads: 0.000000"
## [1] "The number of  P30 Complete Triads: 0.000000"
## [1] "The number of  U19 Complete Triads: 0.000000"
## [1] "The number of  U54 Complete Triads: 20.000000"
## [1] "The number of  R01 Complete Triads: 2.000000"
## [1] "The number of  P20 Complete Triads: 1.000000"
## [1] "The number of  R56 Complete Triads: 0.000000"
## [1] "The number of  R21 Complete Triads: 0.000000"
## [1] "The number of  P42 Complete Triads: 0.000000"
## [1] "The number of  UL1 Complete Triads: 0.000000"
## [1] "The number of  DP1 Complete Triads: 0.000000"
## [1] "The number of  M01 Complete Triads: 1.000000"
## [1] "The number of  U41 Complete Triads: 0.000000"
## [1] "The number of  P51 Complete Triads: 0.000000"
## [1] "The number of  U01 Complete Triads: 1.000000"
## [1] "The number of  RC4 Complete Triads: 0.000000"
## [1] "The number of  RC1 Complete Triads: 0.000000"
## [1] "The number of  RC2 Complete Triads: 0.000000"
## [1] "The number of  UH2 Complete Triads: 0.000000"
## [1] "The number of  RL1 Complete Triads: 0.000000"
## [1] "The number of  R33 Complete Triads: 0.000000"
## [1] "The number of  UM1 Complete Triads: 0.000000"
## [1] "The number of  P60 Complete Triads: 0.000000"

- reduced network -

Question 3

q3_stats = get_stats(g, attr='Act_per_proj_quant')
## [1] "Q3 Does variety in funding sources influence collaboration behavior?"

- original network -

## [1] "###############assortativity###############"
## [1] "assortativity: -0.0360225782131399"
## 
## 
## [1] "###############Degree###############"
## [1] "Average 3 Degree: 18.588235"
## [1] "Average 2 Degree: 11.263636"
## [1] "Average 5 Degree: 8.666667"
## [1] "Average 4 Degree: 12.750000"
## [1] "Average 1 Degree: 9.251337"
## [1] "Average all_nodes Degree: 10.973274"
## 
## 
## [1] "###############Confusion Table###############"
##              Target_Groups
## Source_Groups           1           2           3           4           5
##             1 0.037548204 0.110817942 0.023543739 0.102293485 0.039374873
##             2             0.135782423 0.060888979 0.208443272 0.102699411
##             3                         0.003247412 0.025776334 0.011568906
##             4                                     0.032271159 0.085447534
##             5                                                 0.020296326
## 
## 
## [1] "###############Transitivity###############"
## [1] "The below statistics ignore nodes without edges"
## [1] "Average 3 Transitivity: 0.151307"
## [1] "Average 2 Transitivity: 0.182789"
## [1] "Average 5 Transitivity: 0.246930"
## [1] "Average 4 Transitivity: 0.190784"
## [1] "Average 1 Transitivity: 0.128057"
## 
## 
## [1] "###############Complete Triads###############"
## [1] "The number of  3 Complete Triads: 1.000000"
## [1] "The number of  2 Complete Triads: 43.000000"
## [1] "The number of  5 Complete Triads: 1.000000"
## [1] "The number of  4 Complete Triads: 4.000000"
## [1] "The number of  1 Complete Triads: 5.000000"

- reduced network -

Question 4

q4_stats = get_stats(g, attr='discipline_classification')
## [1] "Q4 Does the scientific discipline influence collaboration behavior?"

- original network -

## [1] "###############assortativity###############"
## [1] "assortativity: 0.203262568923209"
## 
## 
## [1] "###############Degree###############"
## [1] "Average AGRI Degree: 8.735849"
## [1] "Average CHEM Degree: 16.690909"
## [1] "Average PHAR Degree: 14.170732"
## [1] "Average IMMU Degree: 10.343066"
## [1] "Average MULT Degree: 14.274648"
## [1] "Average NEUR Degree: 5.623853"
## [1] "Average MEDI Degree: 13.173077"
## [1] "Average CENG Degree: 12.935484"
## [1] "Average NULL. Degree: 1.000000"
## [1] "Average BIOC Degree: 8.470588"
## [1] "Average COMP Degree: 3.000000"
## [1] "Average PHYS Degree: 21.600000"
## [1] "Average ENGI Degree: 0.000000"
## [1] "Average all_nodes Degree: 10.973274"
## 
## 
## [1] "###############Confusion Table###############"
##              Target_Groups
## Source_Groups         AGRI         BIOC         CENG         CHEM
##         AGRI  0.0367363507 0.0310533793 0.0107570530 0.0024355592
##         BIOC               0.0117718693 0.0089303836 0.0144103917
##         CENG                            0.0152222448 0.0487111833
##         CHEM                                         0.0288207834
##         COMP                                                     
##         ENGI                                                     
##         IMMU                                                     
##         MEDI                                                     
##         MULT                                                     
##         NEUR                                                     
##         NULL.                                                    
##         PHAR                                                     
##         PHYS                                                     
##              Target_Groups
## Source_Groups         COMP         ENGI         IMMU         MEDI
##         AGRI  0.0000000000 0.0000000000 0.0426222854 0.0345037548
##         BIOC  0.0000000000 0.0000000000 0.0172518774 0.0028414857
##         CENG  0.0000000000 0.0000000000 0.0107570530 0.0038563020
##         CHEM  0.0004059265 0.0000000000 0.0178607672 0.0028414857
##         COMP  0.0000000000 0.0000000000 0.0000000000 0.0006088898
##         ENGI               0.0000000000 0.0000000000 0.0000000000
##         IMMU                            0.0444489547 0.0184696570
##         MEDI                                         0.0200933631
##         MULT                                                     
##         NEUR                                                     
##         NULL.                                                    
##         PHAR                                                     
##         PHYS                                                     
##              Target_Groups
## Source_Groups         MULT         NEUR        NULL.         PHAR
##         AGRI  0.0483052567 0.0160340978 0.0002029633 0.0225289223
##         BIOC  0.0263852243 0.0125837223 0.0002029633 0.0083214938
##         CENG  0.0170489141 0.0024355592 0.0000000000 0.0298355998
##         CHEM  0.0221229957 0.0048711183 0.0000000000 0.0123807591
##         COMP  0.0002029633 0.0000000000 0.0000000000 0.0000000000
##         ENGI  0.0000000000 0.0000000000 0.0000000000 0.0000000000
##         IMMU  0.0523645220 0.0089303836 0.0004059265 0.0294296732
##         MEDI  0.0089303836 0.0135985387 0.0000000000 0.0123807591
##         MULT  0.0870712401 0.0336919018 0.0000000000 0.0239496651
##         NEUR               0.0105540897 0.0000000000 0.0111629795
##         NULL.                           0.0000000000 0.0006088898
##         PHAR                                         0.0426222854
##         PHYS                                                     
##              Target_Groups
## Source_Groups         PHYS
##         AGRI  0.0000000000
##         BIOC  0.0006088898
##         CENG  0.0000000000
##         CHEM  0.0026385224
##         COMP  0.0000000000
##         ENGI  0.0000000000
##         IMMU  0.0006088898
##         MEDI  0.0008118531
##         MULT  0.0042622285
##         NEUR  0.0000000000
##         NULL. 0.0000000000
##         PHAR  0.0000000000
##         PHYS  0.0064948244
## 
## 
## [1] "###############Transitivity###############"
## [1] "The below statistics ignore nodes without edges"
## [1] "Average AGRI Transitivity: 0.159806"
## [1] "Average CHEM Transitivity: 0.188649"
## [1] "Average PHAR Transitivity: 0.215610"
## [1] "Average IMMU Transitivity: 0.188178"
## [1] "Average MULT Transitivity: 0.161461"
## [1] "Average NEUR Transitivity: 0.133601"
## [1] "Average MEDI Transitivity: 0.337125"
## [1] "Average CENG Transitivity: 0.186553"
## [1] "Average NULL. Transitivity: 0.500000"
## [1] "Average BIOC Transitivity: 0.141474"
## [1] "Average COMP Transitivity: 0.833333"
## [1] "Average PHYS Transitivity: 0.274074"
## [1] "Average ENGI Transitivity: NaN"
## 
## 
## [1] "###############Complete Triads###############"
## [1] "The number of  AGRI Complete Triads: 3.000000"
## [1] "The number of  CHEM Complete Triads: 4.000000"
## [1] "The number of  PHAR Complete Triads: 5.000000"
## [1] "The number of  IMMU Complete Triads: 15.000000"
## [1] "The number of  MULT Complete Triads: 32.000000"
## [1] "The number of  NEUR Complete Triads: 0.000000"
## [1] "The number of  MEDI Complete Triads: 2.000000"
## [1] "The number of  CENG Complete Triads: 5.000000"
## [1] "The number of  NULL. Complete Triads: 0.000000"
## [1] "The number of  BIOC Complete Triads: 6.000000"
## [1] "The number of  COMP Complete Triads: 0.000000"
## [1] "The number of  PHYS Complete Triads: 0.000000"
## [1] "The number of  ENGI Complete Triads: 0.000000"

- reduced network -