These are the main functions used to create and analyze our network.
First, install and load the following packages:
library(igraph)
## Warning: package 'igraph' was built under R version 3.6.3
##
## Attaching package: 'igraph'
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
library(sna)
## Warning: package 'sna' was built under R version 3.6.3
## Loading required package: statnet.common
## Warning: package 'statnet.common' was built under R version 3.6.3
##
## Attaching package: 'statnet.common'
## The following object is masked from 'package:base':
##
## order
## Loading required package: network
## Warning: package 'network' was built under R version 3.6.3
## network: Classes for Relational Data
## Version 1.16.0 created on 2019-11-30.
## copyright (c) 2005, Carter T. Butts, University of California-Irvine
## Mark S. Handcock, University of California -- Los Angeles
## David R. Hunter, Penn State University
## Martina Morris, University of Washington
## Skye Bender-deMoll, University of Washington
## For citation information, type citation("network").
## Type help("network-package") to get started.
##
## Attaching package: 'network'
## The following objects are masked from 'package:igraph':
##
## %c%, %s%, add.edges, add.vertices, delete.edges,
## delete.vertices, get.edge.attribute, get.edges,
## get.vertex.attribute, is.bipartite, is.directed,
## list.edge.attributes, list.vertex.attributes,
## set.edge.attribute, set.vertex.attribute
## sna: Tools for Social Network Analysis
## Version 2.5 created on 2019-12-09.
## copyright (c) 2005, Carter T. Butts, University of California-Irvine
## For citation information, type citation("sna").
## Type help(package="sna") to get started.
##
## Attaching package: 'sna'
## The following objects are masked from 'package:igraph':
##
## betweenness, bonpow, closeness, components, degree,
## dyad.census, evcent, hierarchy, is.connected, neighborhood,
## triad.census
library(intergraph)
## Warning: package 'intergraph' was built under R version 3.6.3
library(RColorBrewer)
library(randomcoloR)
## Warning: package 'randomcoloR' was built under R version 3.6.3
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:igraph':
##
## as_data_frame, groups, union
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(caret)
## Warning: package 'caret' was built under R version 3.6.3
## Loading required package: lattice
library(ggplot2)
library(reshape2)
Load Functions
The following function builds the graph with all of the node and edge level attributes that are found in the data provided or derived by the data provided.
build_graph <- function (nodes, edges) {
g = graph.empty(nrow(nodes), directed = FALSE)
# covert R factor to character
V(g)$name = as.character(nodes$Id)
V(g)$ACTIVITY = as.character(nodes$ACTIVITY) #### Used for Q2
V(g)$pi_key = as.character(nodes$pi_key)
V(g)$discipline_classification = as.character(nodes$discipline_classification) #### Used for Q4
# a edge array: odd index -> source, even index -> target
edge_list = as.vector(t(cbind(as.character(edges$Source), as.character(edges$Target))))
g = add_edges(g, edge_list)
E(g)$Label = as.character(edges$Label)
E(g)$PUB_YEAR = as.character(edges$PUB_YEAR)
E(g)$COUNTRY = as.character(edges$COUNTRY)
E(g)$ACTIVITY = as.character(edges$ACTIVITY)
E(g)$PMID = as.character(edges$PMID_x)
#E(g)$weight = edges$normalized_discipline_weight ### Used for weighted degree Q4
E(g)$weight = edges$normalized_activity_weight ### Used for weighted degree Q3
E(g)$discipline = as.character(edges$discipline_classification)
V(g)$Group = as.character(nodes$Group) #### Used for Q1
V(g)$Unique_activities = as.character(nodes$unique_activities)
V(g)$Project_count = as.character(nodes$project_count)
V(g)$Act_per_proj_quant = as.character(nodes$Act_per_proj_quant) ### Used for Q3
V(g)$Activity_per_project = as.character(nodes$activity_per_project)
V(g)$discipline.group = as.character(nodes$discipline.group)
V(g)$unique_activities = as.character(nodes$unique_activities)
V(g)$unique_activity_grouped = as.character(nodes$unique_activity_grouped)
#remove nodes by degree
#g = filter_node_by_number_publication(g, num_pub_floor)
return(g)
}
The next functions are essential for calculating the network level statistics.
This function calculates assortativity, which is a single statistic measuring the degree of connections betweeen similiar nodes.
get_assortativity <- function(g, attr) {
cat = igraph::get.vertex.attribute(g, attr )
#!! have to convert vertex values from chartacter to factor
assortativity = assortativity_nominal(g, as.factor(cat), directed = F)
return(assortativity);
}
The next function is used to calculate average degree, which calculates the average connections per node for each attribute specified.
get_degree <- function(g, attr){
cat = igraph::get.vertex.attribute(g, attr )
V(g)$degree = igraph::degree(g)
V(g)$strength = igraph::strength(g)
return(g)
}
The next function calculates a confusion matrix which generates percentages associated with frequencies of collaboration across levels of a particular attribute.
# calcuate a confusion table between groups
get_group_confusion <- function(nodes, edges, attr) {
from_to = select(edges, Source, Target)
from_to$Source = as.character(from_to$Source)
from_to$Target = as.character(from_to$Target)
nodes$Id = as.character(nodes$Id)
from_to = left_join(from_to, nodes, by= c('Source' = 'Id'))
from_to = left_join(from_to, nodes, by= c('Target' = 'Id'))
x = paste0(attr, '.x')
y = paste0(attr, '.y')
cols <- c(x, y)
colNums <- match(cols,colnames(from_to))
fg_tg = select_(from_to, x, y)
colnames(fg_tg) = c('node_a', 'node_b')
Source_Groups = as.factor(fg_tg$node_a);
Target_Groups = as.factor(fg_tg$node_b);
tbl = table(Source_Groups, Target_Groups)
prop_table = prop.table(tbl)
#since it is undirected graph, convert it into triangular
p1 = prop_table;
p2 = t(prop_table);
p3 = p1 + p2
p4 = lower.tri.remove(p3)
diag(p4) = diag(p4) / 2
prop_table = p4
return(prop_table)
}
The next function calculates the transitivity, which measures how tightly knit a community is.
get_transitivity <- function(g) {
cc = transitivity(g, type='local', vids=V(g)$name)
V(g)$cc = cc
return(g)
}
The following function counts the number of cliques with n connected nodes in the network. For our analysis, we used this function to count complete triads.
# looping through clique to count group membership in clique
get_cliques_membership <- function(g, n, attr) {
cs = cliques(g, min=n)
cs_len = sapply(cs, length)
cs_n = cs[cs_len == n]
cat = unique(igraph::get.vertex.attribute(g, attr))
numCli = length(cs_n)
m = matrix(0, nrow = numCli, ncol = length(cat))
colnames(m) = cat
#looping throught cliques to count membership by groups
for (i in 1:numCli) {
c = cs_n[[i]]
a = vertex_attr(g,attr, c)
for (ct in cat) {
m[i,ct] = sum(a==ct)
}
}
return(m)
}
This function takes the confusion matrix and adjusts the percentages to reflect the percentages to be a proportion of the particular level of an attribute rather than reflecting the overall distribution.
get_confusion_within_group <- function(prop_table) {
conf_mtx <- prop_table
conf_mtx[lower.tri(conf_mtx)]<-t(conf_mtx)[lower.tri(conf_mtx)]
conf_mtx <- data.frame(conf_mtx) %>% dplyr::mutate(Source_Groups = as.character(Source_Groups),
Target_Groups = as.character(Target_Groups))
lvl <- unique(conf_mtx$Source_Groups)
conf_mtx[,"percent_within"] = NA
for (i in 1:nrow(conf_mtx)){
for (j in 1:length(lvl)){
if (conf_mtx[i,"Source_Groups"] == lvl[j]){
sum_relevant_rows <- conf_mtx %>% dplyr::filter(Source_Groups == lvl[j])
sum_relevant_rows <- sum(sum_relevant_rows$Freq)
conf_mtx[i,"percent_within"] = as.numeric(conf_mtx[i,"Freq"])/sum_relevant_rows
}
else {return}
}
}
return(conf_mtx)
}
The next function takes the output of the previous function to calculate the within and outside group collaboration for levels of a particular attribute.
get_in_out_tbl <- function(conf_mtx){
sum_tbl <- conf_mtx %>% dplyr::group_by(Source_Groups) %>%
dplyr::summarise(dist = sum(Freq)) %>%
select(Source_Groups,dist) %>%
filter(is.na(dist)==FALSE) %>%
arrange(desc(dist))
in_out_pct <- conf_mtx %>%
dplyr::mutate(match = ifelse(Source_Groups == Target_Groups,"within group","outside group"),
percent_within = ifelse(is.na(percent_within)==TRUE, 0, percent_within)) %>%
dplyr::group_by(Source_Groups, match) %>%
dplyr::summarise(in_out_pct = sum(percent_within))
return(in_out_pct)
}
The following functions use the utility functions listed above and get network statistics by year and overall.
analyze_graph <- function(g, nodes, edges, attr) {
#attr: Group, Activity, Discipline
stats = list()
print(attr)
# 1.assortativity
assortativity = get_assortativity(g, attr)
stats$assortativity = assortativity
# 2. Average degree, transtivity, and degee distributions
g = get_degree(g, attr)
g = get_transitivity(g)
var = vertex_attr(g, attr)
lvls = unique(var)
lvl_len = length(lvls)
dgs = list()
trs = list()
strs = list()
dg_dist = list()
for (i in 1:lvl_len) {
lvl = lvls[i]
lvl_nodes = V(g)[var == lvl]
dgs[[lvl]] = mean(lvl_nodes$degree, na.rm = TRUE)
trs[[lvl]] = mean(lvl_nodes$cc, na.rm = TRUE)
strs[[lvl]] = mean(lvl_nodes$strength, na.rm = TRUE)
dg = degree.distribution(g, v=lvl_nodes)
dg_dist[[lvl]] = dg
}
dgs[['all_nodes']] = mean(V(g)$degree, na.rm = TRUE)
strs[['all_nodes']] = mean(V(g)$strength, na.rm = TRUE)
all_dg = degree.distribution(g)
dg_dist[['all_nodes']] = all_dg
stats$degree = dgs
stats$strength = strs
stats$transitivity = trs
stats$degree_dist = dg_dist
# 3. confusion matrix dyad level
prop_table = get_group_confusion(nodes, edges, attr)
stats$prop_table = prop_table
# 3a. within group vs outside group %
in_out_table = get_in_out_tbl(get_confusion_within_group(prop_table))
stats$in_out_table = in_out_table
## calcuate graph level statistics
# the ratio of the number of edges and the number of possible edges
stats$density = edge_density(g, loops=FALSE)
#count group membership in complete triad graph
print(paste(rep('#', 30),collapse = ''))
n = 3
m = get_cliques_membership(g, n, attr)
triad_counts = list()
for (i in 1:lvl_len) {
lvl = lvls[i]
lvl_nodes = V(g)[var == lvl]
triad_count = sum(m[, lvl] == 3)
triad_counts[[lvl]] = triad_count
}
stats$triads_counts = triad_counts
return(stats)
#group blocks
#clustering analysis
#get_blockmodel()
}
get_stats <- function(g, attr) {
all_years_stats = analyze_graph(g, nodes, edges,attr)
# looping over years
yr_range = 2002:2016
stats_by_year = list()
for (yr in yr_range) {
yr_edges = edges[edges$PUB_YEAR<=yr,]
yr_g = build_graph(nodes, yr_edges)
stat = analyze_graph(yr_g, nodes, yr_edges, attr)
print(stat)
stats_by_year[[as.character(yr)]] = stat
}
stats = list()
stats[['all_years_stats']] = all_years_stats
stats[['stats_by_year']] = stats_by_year
return(stats)
}
Plotting Functions
This function plots the Degree Distribution.
plot_deg_dist1 <- function(stats) {
par(mai=c(0.5, 0.5, 0.5, 0.5))
# layout(cbind(1,2), widths=c(6,1)) # put legend on bottom 1/8th of the chart
all_years_stats = stats$all_years_stats
dists = all_years_stats$degree_dist
n = length(dists)
palette <- distinctColorPalette(n)
#palette<- c("blue","green4")
dists = all_years_stats$degree_dist
all_node_dist = dists[['all_nodes']]
all_dgs = 1:length(all_node_dist)
all_dgs = all_dgs[all_node_dist != 0]
all_node_dist = all_node_dist[all_node_dist != 0]
plot( x=all_dgs[all_dgs < 20], y=all_node_dist[all_dgs < 20], pch=19, cex=0.5, col="black",
xlab="Degree", ylab="Frequency", ylim=c(0, 0.2), type="o",lty=1,lwd = 2)
lvls = names(dists)
for (i in 1:length(lvls)) {
lvl = lvls[[i]]
if ( lvl != 'all_nodes') {
lvl_dist = dists[[lvl]]
lvl_dg= 1:length(lvl_dist)
lvl_dg = lvl_dg[lvl_dist != 0]
lvl_dist = lvl_dist[lvl_dist != 0]
lines( x=lvl_dg[lvl_dg < 20], y=lvl_dist[lvl_dg < 20], pch=19, cex=0.5, col=palette[[i]],
xlab="Degree", ylab="Frequency", type="o", lty=1, lwd = 2)
}
}
legend(15, 0.17, c('all_nodes', lvls[1:length(lvls)-1]), col = c('black',palette[1:length(lvls)-1]),
text.col = c('black',palette[1:length(lvls)-1]), lty = 1,
merge = TRUE, xpd=T, cex=1)
#dev.off()
}
This function plots the confusion matrix as a heat map.
plot_block_heatmap <- function(conf) {
conf.l = melt(conf)
ggplot(conf.l, aes(Source_Groups, Target_Groups)) + geom_tile(aes(fill = value),colour = "white") +
scale_fill_gradient(low = "white", high = "steelblue") +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
}
This function plots the within and outside group percentages by level of attribute.
plot_in_out <- function(stats, attr_name){
plot.data <- stats$all_years_stats$in_out_table %>%
arrange(Source_Groups, desc(match))%>%
filter(Source_Groups != 'NULL.'& Source_Groups != 'PHYS'&
Source_Groups != 'COMP' & Source_Groups != 'ENGI') %>%
filter(Source_Groups != 'DP1'& Source_Groups != 'P42'& Source_Groups != 'P51'&
Source_Groups != 'P60'& Source_Groups != 'R21'& Source_Groups != 'R33'&
Source_Groups != 'R56'& substr(Source_Groups,1,2) != 'RC'&
Source_Groups != 'RL1'& Source_Groups != 'U01'& Source_Groups != 'U41'&
Source_Groups != 'UH2'& Source_Groups != 'UL1'& Source_Groups != 'UM1')
plot.data$Source_Groups <- as.factor(plot.data$Source_Groups)
ggplot(data=plot.data, aes(x=Source_Groups, y=in_out_pct, fill=match)) +
geom_bar(stat='identity') + coord_flip() +
scale_fill_manual(values=c("slategray2", "navyblue")) +
xlab(attr_name) + ylab("Percentage of Group") +
ggtitle("Percentage of Collaboration Within and Outside of Group")
}
Function to print statistics.
print_stat <- function(all_years_stats) {
print(paste(c(rep('#', 15), 'assortativity', rep('#', 15)),collapse = ''))
print(paste0('assortativity: ', as.character(all_years_stats$assortativity)))
cat('\n')
cat('\n')
#print degree
print(paste(c(rep('#', 15), 'Degree', rep('#', 15)),collapse = ''))
dgs = all_years_stats[['degree']]
dns = names(dgs)
for (d in dns) {
print(paste0('Average ', d, ' Degree: ', sprintf("%.6f",dgs[[d]] )))
}
cat('\n')
cat('\n')
print(paste(c(rep('#', 15), 'Confusion Table', rep('#', 15)),collapse = ''))
print(all_years_stats$prop_table)
cat('\n')
cat('\n')
#print Transitivity
print(paste(c(rep('#', 15), 'Transitivity', rep('#', 15)),collapse = ''))
print('The below statistics ignore nodes without edges')
trs = all_years_stats[['transitivity']]
tns = names(trs)
for (t in tns) {
print(paste0('Average ', t, ' Transitivity: ', sprintf("%.6f",trs[[t]] )))
}
cat('\n')
cat('\n')
#print triad count
print(paste(c(rep('#', 15), 'Complete Triads', rep('#', 15)),collapse = ''))
t_c = all_years_stats[['triads_counts']]
tns = names(t_c)
for (t in tns) {
print(paste0('The number of ', t, ' Complete Triads: ', sprintf("%.6f",t_c[[t]] )))
}
cat('\n')
cat('\n')
}
Analysis
Load edge and node files. Only include complete nodes and build graph.
getwd()
## [1] "C:/Users/Sebastian Pasotr/Documents/Data_Coding/Social Network Analytics/Week_3"
list.files()
## [1] "6_MainNetworkFunctionsModifiedBySoominCompareOriginalAndReducedNetwork.Rmd"
## [2] "6_MainNetworkFunctionsModifiedBySoominCompareOriginalAndReducedNetwork_files"
## [3] "drive-download-20200513T105441Z-001.zip"
## [4] "nodes_added_grouped_attributes.csv"
## [5] "study_comp_edges_disc.csv"
## [6] "study_comp_edges_reduced1.csv"
nodes = read.csv("nodes_added_grouped_attributes.csv", header=T)
nodes = nodes[complete.cases(nodes$Id),]
edges = read.csv("study_comp_edges_disc.csv", header=T)
edges2 = read.csv("study_comp_edges_reduced1.csv", header=T)
g <- build_graph(nodes, edges)
Question 1
q1_stats = get_stats(g, attr='Group')
## Warning: select_() is deprecated.
## Please use select() instead
##
## The 'programming' vignette or the tidyeval book can help you
## to program with select() : https://tidyeval.tidyverse.org
## This warning is displayed once per session.
## [1] "Q1: Does NIGMS researchers collaborate with each other more ofthen than with other groups?"
- original network -
## [1] "###############assortativity###############"
## [1] "assortativity: 0.209681388381732"
##
##
## [1] "###############Degree###############"
## [1] "Average comp Degree: 10.942169"
## [1] "Average study Degree: 11.000000"
## [1] "Average all_nodes Degree: 10.973274"
##
##
## [1] "###############Confusion Table###############"
## Target_Groups
## Source_Groups comp study
## comp 0.2644611 0.3927339
## study 0.3428050
##
##
## [1] "###############Transitivity###############"
## [1] "The below statistics ignore nodes without edges"
## [1] "Average comp Transitivity: 0.167929"
## [1] "Average study Transitivity: 0.194331"
##
##
## [1] "###############Complete Triads###############"
## [1] "The number of comp Complete Triads: 83.000000"
## [1] "The number of study Complete Triads: 200.000000"
- reduced network -
# # reduced network
# q1_all_years_stats2 = q1_stats2[['all_years_stats']]
# print_stat(q1_all_years_stats2)
# #plot_deg_dist1(q1_all_years_stats)
# plot_block_heatmap(q1_all_years_stats2$prop_table)
# plot_in_out(q1_stats2,"Comparison vs Study Group")
Question 2
q2_stats = get_stats(g, attr='ACTIVITY')
## [1] "Q2 Does the type of grant awarded influence collaboration behavior?"
- original network -
## [1] "###############assortativity###############"
## [1] "assortativity: 0.102341281934154"
##
##
## [1] "###############Degree###############"
## [1] "Average P50 Degree: 12.651163"
## [1] "Average P41 Degree: 13.216981"
## [1] "Average P01 Degree: 10.772414"
## [1] "Average R37 Degree: 6.279070"
## [1] "Average P30 Degree: 12.146341"
## [1] "Average U19 Degree: 9.761905"
## [1] "Average U54 Degree: 22.319444"
## [1] "Average R01 Degree: 6.256098"
## [1] "Average P20 Degree: 9.800000"
## [1] "Average R56 Degree: 3.550000"
## [1] "Average R21 Degree: 2.666667"
## [1] "Average P42 Degree: 15.200000"
## [1] "Average UL1 Degree: 1.500000"
## [1] "Average DP1 Degree: 9.000000"
## [1] "Average M01 Degree: 12.869565"
## [1] "Average U41 Degree: 8.500000"
## [1] "Average P51 Degree: 20.000000"
## [1] "Average U01 Degree: 16.944444"
## [1] "Average RC4 Degree: 20.833333"
## [1] "Average RC1 Degree: 7.250000"
## [1] "Average RC2 Degree: 44.625000"
## [1] "Average UH2 Degree: 25.000000"
## [1] "Average RL1 Degree: 1.000000"
## [1] "Average R33 Degree: 0.000000"
## [1] "Average UM1 Degree: 202.000000"
## [1] "Average P60 Degree: 0.000000"
## [1] "Average all_nodes Degree: 10.973274"
##
##
## [1] "###############Confusion Table###############"
## Target_Groups
## Source_Groups DP1 M01 P01 P20
## DP1 0.0000000000 0.0000000000 0.0006088898 0.0000000000
## M01 0.0018266694 0.0109600162 0.0008118531
## P01 0.0416074690 0.0040592653
## P20 0.0010148163
## P30
## P41
## P42
## P50
## P51
## P60
## R01
## R21
## R33
## R37
## R56
## RC1
## RC2
## RC4
## RL1
## U01
## U19
## U41
## U54
## UH2
## UL1
## UM1
## Target_Groups
## Source_Groups P30 P41 P42 P50
## DP1 0.0010148163 0.0022325959 0.0000000000 0.0000000000
## M01 0.0012177796 0.0014207428 0.0000000000 0.0154252080
## P01 0.0182666937 0.0505378526 0.0018266694 0.0087274203
## P20 0.0192815100 0.0024355592 0.0054800081 0.0022325959
## P30 0.0010148163 0.0085244571 0.0006088898 0.0018266694
## P41 0.0259792977 0.0018266694 0.0121777958
## P42 0.0002029633 0.0010148163
## P50 0.0056829714
## P51
## P60
## R01
## R21
## R33
## R37
## R56
## RC1
## RC2
## RC4
## RL1
## U01
## U19
## U41
## U54
## UH2
## UL1
## UM1
## Target_Groups
## Source_Groups P51 P60 R01 R21
## DP1 0.0000000000 0.0000000000 0.0012177796 0.0000000000
## M01 0.0014207428 0.0000000000 0.0054800081 0.0000000000
## P01 0.0081185305 0.0000000000 0.0322711589 0.0006088898
## P20 0.0004059265 0.0000000000 0.0028414857 0.0000000000
## P30 0.0000000000 0.0000000000 0.0131926121 0.0000000000
## P41 0.0002029633 0.0000000000 0.0819971585 0.0002029633
## P42 0.0000000000 0.0000000000 0.0006088898 0.0000000000
## P50 0.0016237061 0.0000000000 0.0117718693 0.0004059265
## P51 0.0000000000 0.0000000000 0.0000000000 0.0000000000
## P60 0.0000000000 0.0000000000 0.0000000000
## R01 0.0466815506 0.0006088898
## R21 0.0000000000
## R33
## R37
## R56
## RC1
## RC2
## RC4
## RL1
## U01
## U19
## U41
## U54
## UH2
## UL1
## UM1
## Target_Groups
## Source_Groups R33 R37 R56 RC1
## DP1 0.0000000000 0.0004059265 0.0000000000 0.0000000000
## M01 0.0000000000 0.0006088898 0.0000000000 0.0004059265
## P01 0.0000000000 0.0186726203 0.0020296326 0.0008118531
## P20 0.0000000000 0.0006088898 0.0000000000 0.0000000000
## P30 0.0000000000 0.0085244571 0.0012177796 0.0000000000
## P41 0.0000000000 0.0219200325 0.0012177796 0.0002029633
## P42 0.0000000000 0.0014207428 0.0004059265 0.0000000000
## P50 0.0000000000 0.0052770449 0.0048711183 0.0008118531
## P51 0.0000000000 0.0018266694 0.0002029633 0.0000000000
## P60 0.0000000000 0.0000000000 0.0000000000 0.0000000000
## R01 0.0000000000 0.0174548407 0.0028414857 0.0010148163
## R21 0.0000000000 0.0004059265 0.0006088898 0.0002029633
## R33 0.0000000000 0.0000000000 0.0000000000 0.0000000000
## R37 0.0060888979 0.0004059265 0.0018266694
## R56 0.0000000000 0.0000000000
## RC1 0.0000000000
## RC2
## RC4
## RL1
## U01
## U19
## U41
## U54
## UH2
## UL1
## UM1
## Target_Groups
## Source_Groups RC2 RC4 RL1 U01
## DP1 0.0000000000 0.0034503755 0.0000000000 0.0002029633
## M01 0.0032474122 0.0000000000 0.0000000000 0.0040592653
## P01 0.0062918612 0.0028414857 0.0000000000 0.0095392734
## P20 0.0002029633 0.0004059265 0.0000000000 0.0004059265
## P30 0.0002029633 0.0000000000 0.0000000000 0.0048711183
## P41 0.0028414857 0.0024355592 0.0000000000 0.0038563020
## P42 0.0004059265 0.0000000000 0.0000000000 0.0000000000
## P50 0.0004059265 0.0034503755 0.0000000000 0.0014207428
## P51 0.0000000000 0.0000000000 0.0000000000 0.0014207428
## P60 0.0000000000 0.0000000000 0.0000000000 0.0000000000
## R01 0.0030444490 0.0018266694 0.0002029633 0.0044651918
## R21 0.0000000000 0.0000000000 0.0000000000 0.0000000000
## R33 0.0000000000 0.0000000000 0.0000000000 0.0000000000
## R37 0.0002029633 0.0042622285 0.0000000000 0.0012177796
## R56 0.0000000000 0.0002029633 0.0000000000 0.0002029633
## RC1 0.0000000000 0.0000000000 0.0000000000 0.0000000000
## RC2 0.0249644814 0.0004059265 0.0000000000 0.0032474122
## RC4 0.0010148163 0.0000000000 0.0002029633
## RL1 0.0000000000 0.0000000000
## U01 0.0008118531
## U19
## U41
## U54
## UH2
## UL1
## UM1
## Target_Groups
## Source_Groups U19 U41 U54 UH2
## DP1 0.0000000000 0.0000000000 0.0000000000 0.0000000000
## M01 0.0024355592 0.0000000000 0.0085244571 0.0000000000
## P01 0.0062918612 0.0004059265 0.0483052567 0.0002029633
## P20 0.0036533387 0.0000000000 0.0048711183 0.0000000000
## P30 0.0024355592 0.0002029633 0.0176578039 0.0000000000
## P41 0.0042622285 0.0000000000 0.0328800487 0.0000000000
## P42 0.0004059265 0.0000000000 0.0008118531 0.0000000000
## P50 0.0024355592 0.0002029633 0.0237467018 0.0000000000
## P51 0.0000000000 0.0000000000 0.0042622285 0.0048711183
## P60 0.0000000000 0.0000000000 0.0000000000 0.0000000000
## R01 0.0030444490 0.0004059265 0.0345037548 0.0000000000
## R21 0.0000000000 0.0000000000 0.0002029633 0.0000000000
## R33 0.0000000000 0.0000000000 0.0000000000 0.0000000000
## R37 0.0024355592 0.0000000000 0.0095392734 0.0000000000
## R56 0.0000000000 0.0000000000 0.0002029633 0.0000000000
## RC1 0.0004059265 0.0000000000 0.0002029633 0.0000000000
## RC2 0.0008118531 0.0000000000 0.0012177796 0.0000000000
## RC4 0.0014207428 0.0000000000 0.0024355592 0.0000000000
## RL1 0.0000000000 0.0000000000 0.0000000000 0.0000000000
## U01 0.0026385224 0.0002029633 0.0172518774 0.0050740816
## U19 0.0028414857 0.0000000000 0.0032474122 0.0000000000
## U41 0.0010148163 0.0000000000 0.0000000000
## U54 0.0403896895 0.0000000000
## UH2 0.0000000000
## UL1
## UM1
## Target_Groups
## Source_Groups UL1 UM1
## DP1 0.0000000000 0.0000000000
## M01 0.0000000000 0.0004059265
## P01 0.0002029633 0.0022325959
## P20 0.0000000000 0.0000000000
## P30 0.0000000000 0.0000000000
## P41 0.0000000000 0.0012177796
## P42 0.0000000000 0.0002029633
## P50 0.0002029633 0.0010148163
## P51 0.0000000000 0.0000000000
## P60 0.0000000000 0.0000000000
## R01 0.0000000000 0.0002029633
## R21 0.0000000000 0.0000000000
## R33 0.0000000000 0.0000000000
## R37 0.0000000000 0.0004059265
## R56 0.0000000000 0.0000000000
## RC1 0.0000000000 0.0000000000
## RC2 0.0000000000 0.0000000000
## RC4 0.0000000000 0.0000000000
## RL1 0.0000000000 0.0000000000
## U01 0.0000000000 0.0000000000
## U19 0.0000000000 0.0000000000
## U41 0.0000000000 0.0000000000
## U54 0.0002029633 0.0353156079
## UH2 0.0000000000 0.0000000000
## UL1 0.0000000000 0.0000000000
## UM1 0.0000000000
##
##
## [1] "###############Transitivity###############"
## [1] "The below statistics ignore nodes without edges"
## [1] "Average P50 Transitivity: 0.226433"
## [1] "Average P41 Transitivity: 0.171009"
## [1] "Average P01 Transitivity: 0.203538"
## [1] "Average R37 Transitivity: 0.164108"
## [1] "Average P30 Transitivity: 0.187444"
## [1] "Average U19 Transitivity: 0.183554"
## [1] "Average U54 Transitivity: 0.192601"
## [1] "Average R01 Transitivity: 0.162702"
## [1] "Average P20 Transitivity: 0.179750"
## [1] "Average R56 Transitivity: 0.133333"
## [1] "Average R21 Transitivity: 0.111111"
## [1] "Average P42 Transitivity: 0.357381"
## [1] "Average UL1 Transitivity: 0.000000"
## [1] "Average DP1 Transitivity: 0.055556"
## [1] "Average M01 Transitivity: 0.242348"
## [1] "Average U41 Transitivity: 0.000000"
## [1] "Average P51 Transitivity: 0.101299"
## [1] "Average U01 Transitivity: 0.200764"
## [1] "Average RC4 Transitivity: 0.093778"
## [1] "Average RC1 Transitivity: 0.316667"
## [1] "Average RC2 Transitivity: 0.275570"
## [1] "Average UH2 Transitivity: 0.000000"
## [1] "Average RL1 Transitivity: NaN"
## [1] "Average R33 Transitivity: NaN"
## [1] "Average UM1 Transitivity: 0.198830"
## [1] "Average P60 Transitivity: NaN"
##
##
## [1] "###############Complete Triads###############"
## [1] "The number of P50 Complete Triads: 0.000000"
## [1] "The number of P41 Complete Triads: 7.000000"
## [1] "The number of P01 Complete Triads: 6.000000"
## [1] "The number of R37 Complete Triads: 0.000000"
## [1] "The number of P30 Complete Triads: 0.000000"
## [1] "The number of U19 Complete Triads: 0.000000"
## [1] "The number of U54 Complete Triads: 20.000000"
## [1] "The number of R01 Complete Triads: 2.000000"
## [1] "The number of P20 Complete Triads: 1.000000"
## [1] "The number of R56 Complete Triads: 0.000000"
## [1] "The number of R21 Complete Triads: 0.000000"
## [1] "The number of P42 Complete Triads: 0.000000"
## [1] "The number of UL1 Complete Triads: 0.000000"
## [1] "The number of DP1 Complete Triads: 0.000000"
## [1] "The number of M01 Complete Triads: 1.000000"
## [1] "The number of U41 Complete Triads: 0.000000"
## [1] "The number of P51 Complete Triads: 0.000000"
## [1] "The number of U01 Complete Triads: 1.000000"
## [1] "The number of RC4 Complete Triads: 0.000000"
## [1] "The number of RC1 Complete Triads: 0.000000"
## [1] "The number of RC2 Complete Triads: 0.000000"
## [1] "The number of UH2 Complete Triads: 0.000000"
## [1] "The number of RL1 Complete Triads: 0.000000"
## [1] "The number of R33 Complete Triads: 0.000000"
## [1] "The number of UM1 Complete Triads: 0.000000"
## [1] "The number of P60 Complete Triads: 0.000000"
- reduced network -
Question 3
q3_stats = get_stats(g, attr='Act_per_proj_quant')
## [1] "Q3 Does variety in funding sources influence collaboration behavior?"
- original network -
## [1] "###############assortativity###############"
## [1] "assortativity: -0.0360225782131399"
##
##
## [1] "###############Degree###############"
## [1] "Average 3 Degree: 18.588235"
## [1] "Average 2 Degree: 11.263636"
## [1] "Average 5 Degree: 8.666667"
## [1] "Average 4 Degree: 12.750000"
## [1] "Average 1 Degree: 9.251337"
## [1] "Average all_nodes Degree: 10.973274"
##
##
## [1] "###############Confusion Table###############"
## Target_Groups
## Source_Groups 1 2 3 4 5
## 1 0.037548204 0.110817942 0.023543739 0.102293485 0.039374873
## 2 0.135782423 0.060888979 0.208443272 0.102699411
## 3 0.003247412 0.025776334 0.011568906
## 4 0.032271159 0.085447534
## 5 0.020296326
##
##
## [1] "###############Transitivity###############"
## [1] "The below statistics ignore nodes without edges"
## [1] "Average 3 Transitivity: 0.151307"
## [1] "Average 2 Transitivity: 0.182789"
## [1] "Average 5 Transitivity: 0.246930"
## [1] "Average 4 Transitivity: 0.190784"
## [1] "Average 1 Transitivity: 0.128057"
##
##
## [1] "###############Complete Triads###############"
## [1] "The number of 3 Complete Triads: 1.000000"
## [1] "The number of 2 Complete Triads: 43.000000"
## [1] "The number of 5 Complete Triads: 1.000000"
## [1] "The number of 4 Complete Triads: 4.000000"
## [1] "The number of 1 Complete Triads: 5.000000"
- reduced network -
Question 4
q4_stats = get_stats(g, attr='discipline_classification')
## [1] "Q4 Does the scientific discipline influence collaboration behavior?"
- original network -
## [1] "###############assortativity###############"
## [1] "assortativity: 0.203262568923209"
##
##
## [1] "###############Degree###############"
## [1] "Average AGRI Degree: 8.735849"
## [1] "Average CHEM Degree: 16.690909"
## [1] "Average PHAR Degree: 14.170732"
## [1] "Average IMMU Degree: 10.343066"
## [1] "Average MULT Degree: 14.274648"
## [1] "Average NEUR Degree: 5.623853"
## [1] "Average MEDI Degree: 13.173077"
## [1] "Average CENG Degree: 12.935484"
## [1] "Average NULL. Degree: 1.000000"
## [1] "Average BIOC Degree: 8.470588"
## [1] "Average COMP Degree: 3.000000"
## [1] "Average PHYS Degree: 21.600000"
## [1] "Average ENGI Degree: 0.000000"
## [1] "Average all_nodes Degree: 10.973274"
##
##
## [1] "###############Confusion Table###############"
## Target_Groups
## Source_Groups AGRI BIOC CENG CHEM
## AGRI 0.0367363507 0.0310533793 0.0107570530 0.0024355592
## BIOC 0.0117718693 0.0089303836 0.0144103917
## CENG 0.0152222448 0.0487111833
## CHEM 0.0288207834
## COMP
## ENGI
## IMMU
## MEDI
## MULT
## NEUR
## NULL.
## PHAR
## PHYS
## Target_Groups
## Source_Groups COMP ENGI IMMU MEDI
## AGRI 0.0000000000 0.0000000000 0.0426222854 0.0345037548
## BIOC 0.0000000000 0.0000000000 0.0172518774 0.0028414857
## CENG 0.0000000000 0.0000000000 0.0107570530 0.0038563020
## CHEM 0.0004059265 0.0000000000 0.0178607672 0.0028414857
## COMP 0.0000000000 0.0000000000 0.0000000000 0.0006088898
## ENGI 0.0000000000 0.0000000000 0.0000000000
## IMMU 0.0444489547 0.0184696570
## MEDI 0.0200933631
## MULT
## NEUR
## NULL.
## PHAR
## PHYS
## Target_Groups
## Source_Groups MULT NEUR NULL. PHAR
## AGRI 0.0483052567 0.0160340978 0.0002029633 0.0225289223
## BIOC 0.0263852243 0.0125837223 0.0002029633 0.0083214938
## CENG 0.0170489141 0.0024355592 0.0000000000 0.0298355998
## CHEM 0.0221229957 0.0048711183 0.0000000000 0.0123807591
## COMP 0.0002029633 0.0000000000 0.0000000000 0.0000000000
## ENGI 0.0000000000 0.0000000000 0.0000000000 0.0000000000
## IMMU 0.0523645220 0.0089303836 0.0004059265 0.0294296732
## MEDI 0.0089303836 0.0135985387 0.0000000000 0.0123807591
## MULT 0.0870712401 0.0336919018 0.0000000000 0.0239496651
## NEUR 0.0105540897 0.0000000000 0.0111629795
## NULL. 0.0000000000 0.0006088898
## PHAR 0.0426222854
## PHYS
## Target_Groups
## Source_Groups PHYS
## AGRI 0.0000000000
## BIOC 0.0006088898
## CENG 0.0000000000
## CHEM 0.0026385224
## COMP 0.0000000000
## ENGI 0.0000000000
## IMMU 0.0006088898
## MEDI 0.0008118531
## MULT 0.0042622285
## NEUR 0.0000000000
## NULL. 0.0000000000
## PHAR 0.0000000000
## PHYS 0.0064948244
##
##
## [1] "###############Transitivity###############"
## [1] "The below statistics ignore nodes without edges"
## [1] "Average AGRI Transitivity: 0.159806"
## [1] "Average CHEM Transitivity: 0.188649"
## [1] "Average PHAR Transitivity: 0.215610"
## [1] "Average IMMU Transitivity: 0.188178"
## [1] "Average MULT Transitivity: 0.161461"
## [1] "Average NEUR Transitivity: 0.133601"
## [1] "Average MEDI Transitivity: 0.337125"
## [1] "Average CENG Transitivity: 0.186553"
## [1] "Average NULL. Transitivity: 0.500000"
## [1] "Average BIOC Transitivity: 0.141474"
## [1] "Average COMP Transitivity: 0.833333"
## [1] "Average PHYS Transitivity: 0.274074"
## [1] "Average ENGI Transitivity: NaN"
##
##
## [1] "###############Complete Triads###############"
## [1] "The number of AGRI Complete Triads: 3.000000"
## [1] "The number of CHEM Complete Triads: 4.000000"
## [1] "The number of PHAR Complete Triads: 5.000000"
## [1] "The number of IMMU Complete Triads: 15.000000"
## [1] "The number of MULT Complete Triads: 32.000000"
## [1] "The number of NEUR Complete Triads: 0.000000"
## [1] "The number of MEDI Complete Triads: 2.000000"
## [1] "The number of CENG Complete Triads: 5.000000"
## [1] "The number of NULL. Complete Triads: 0.000000"
## [1] "The number of BIOC Complete Triads: 6.000000"
## [1] "The number of COMP Complete Triads: 0.000000"
## [1] "The number of PHYS Complete Triads: 0.000000"
## [1] "The number of ENGI Complete Triads: 0.000000"
- reduced network -