Introduction
This analysis is to understand talent flows between firms. An important aspect by which firms are interconnected is through the talent (employees) that migrates between firms. Using a snapshot of LinkedIn data, we have assembled a data set to create this network of talent flows. Each node in the graph is an S&P 500 firm from 2018, each with its own set of node-attributes such as industry, size, etc. Each directed edge represents the talent flow between the nodes (firms) it connects. For example, A → B means that employees have moved from the origin node (A) to the destination node (B). Each edge also has associated edge-attributes such as the number of employees migrated.
# load the required libraries
library("igraph")
library("readr")
library("poweRlaw")
library("ggplot2")
library("scales")
library("cowplot")
library("psych")
library("dplyr")
library('RColorBrewer')
# load data files for analysis
= read_csv("talent_flows.csv")
df_talent_flows = read_csv("linkedin_company_metadata.csv")
df_company
# view the column names
names(df_talent_flows)
## [1] "from" "to" "migration_count"
names(df_company)
## [1] "company_id" "name" "industry" "city" "country"
## [6] "founded" "hq" "overview" "emp_count"
# view sample values
head(df_talent_flows)
head(df_company)
# create a dataframe called df_edges, where the first two columns are from and to.
# this will make it easier to work with igraph
= df_talent_flows df_edges
# create an igraph graph object using the df_edges dataframe
= graph_from_data_frame(df_edges,directed = TRUE)
igrapgh
# print the igrapgh object igrapgh_talent_flows
print(igrapgh, e=TRUE, v=TRUE)
## IGRAPH 0ce9a85 DN-- 473 81114 --
## + attr: name (v/c), migration_count (e/n)
## + edges from 0ce9a85 (vertex names):
## [1] at&t ->oracle
## [2] colgate-palmolive ->nike
## [3] agilent-technologies->stryker
## [4] ebay ->expedia
## [5] comcast ->republic-services-inc
## [6] aon ->aig
## [7] costco-wholesale ->apple
## [8] facebook ->cisco
## + ... omitted several edges
# view vertex
V(igrapgh)
## + 473/473 vertices, named, from 0ce9a85:
## [1] at&t colgate-palmolive
## [3] agilent-technologies ebay
## [5] comcast aon
## [7] costco-wholesale facebook
## [9] john-deere ross-stores
## [11] american-express target
## [13] cme-group jpmorgan-chase
## [15] united-airlines the-home-depot
## [17] xerox wellsfargo
## [19] boeing jefferies
## + ... omitted several vertices
# vertex size
gorder(igrapgh)
## [1] 473
# view edges
E(igrapgh)
## + 81114/81114 edges from 0ce9a85 (vertex names):
## [1] at&t ->oracle
## [2] colgate-palmolive ->nike
## [3] agilent-technologies->stryker
## [4] ebay ->expedia
## [5] comcast ->republic-services-inc
## [6] aon ->aig
## [7] costco-wholesale ->apple
## [8] facebook ->cisco
## [9] john-deere ->ge
## [10] ross-stores ->walmart
## + ... omitted several edges
# edge size
gsize(igrapgh)
## [1] 81114
# is the graph directed?
is.directed(igrapgh)
## [1] TRUE
Calculate the in-degree and out-degree for each firm. What are the top 10 firms with the highest in-degree? What are the 10 firms with the highest out-degree? Describe in your own words what these metrics mean.
# calculate total nodes
<- df_edges %>%
a1 distinct(from) %>%
rename(label = "from")
<- df_edges %>%
b1 distinct(to) %>%
rename(label = "to")
<- full_join(a1, b1, by = "label")
df_edges_nodes_all <- as.data.frame(df_edges_nodes_all)
df_edges_nodes_all
# create in-link weights for each from-to combination
<- df_edges %>%
df_edges_in_deg group_by(to, from) %>%
summarise(weight = n()) %>%
ungroup()
# create out-link weights for each from-to combination
<- df_edges %>%
df_edges_out_deg group_by(from, to) %>%
summarise(weight = n()) %>%
ungroup()
# sum the weight count to calculate total In degree
<-
df_edges_in_deg aggregate(df_edges_in_deg$weight,
by = list(name = df_edges_in_deg$to),
FUN = sum)
# sum the weight count to calculate total Out degree
<-
df_edges_out_deg aggregate(df_edges_out_deg$weight,
by = list(name = df_edges_out_deg$from),
FUN = sum)
# combine in-degree with nodes list
<- df_edges_in_deg %>%
df_edges_in_deg_data right_join(df_edges_nodes_all, by = c("name" = "label"))
<- as.data.frame(df_edges_in_deg_data)
df_edges_in_deg_data
# combine out-degree with nodes list
<- df_edges_out_deg %>%
df_edges_out_deg_data right_join(df_edges_nodes_all, by = c("name" = "label"))
<- as.data.frame(df_edges_out_deg_data)
df_edges_out_deg_data
# create data frame of top 10 firms with highest in-degree
<-
top10_in_deg_firms as.data.frame(head(df_edges_in_deg_data[order(df_edges_in_deg_data$x,
decreasing = T),],n=10))
<- top10_in_deg_firms %>%
top10_in_deg_firms_list inner_join(df_company, by = c("name" = "company_id")) %>%
select(name.y, industry, city, country, emp_count) %>%
rename("company name" = "name.y", "employee count" = "emp_count")
# create data frame of top 10 firms with highest out-degree
<-
top10_out_deg_firms as.data.frame(head(df_edges_out_deg_data[order(df_edges_out_deg_data$x,
decreasing = T),],n=10))
<- top10_out_deg_firms %>%
top10_out_deg_firms_list inner_join(df_company, by = c("name" = "company_id")) %>%
select(name.y, industry, city, country, emp_count) %>%
rename("company name" = "name.y", "employee count" = "emp_count")
print("The top 10 firms with the highest in-degree are : ")
## [1] "The top 10 firms with the highest in-degree are : "
print(top10_in_deg_firms_list)
## company name industry
## 1 IBM information technology and services
## 2 Accenture information technology and services
## 3 Hewlett Packard Enterprise information technology and services
## 4 AT&T telecommunications
## 5 Amazon internet
## 6 Bank of America banking
## 7 Wells Fargo financial services
## 8 JPMorgan Chase & Co. financial services
## 9 Microsoft computer software
## 10 Citi financial services
## city country employee count
## 1 Armonk, New York United States 771986
## 2 Dublin 2 Ireland 480235
## 3 Palo Alto United States 506236
## 4 Dallas United States 321692
## 5 Seattle United States 139917
## 6 Charlotte United States 326301
## 7 San Francisco United States 302434
## 8 New York United States 270061
## 9 Redmond United States 302297
## 10 New York United States 323488
print("The top 10 firms with the highest out-degree are : ")
## [1] "The top 10 firms with the highest out-degree are : "
print(top10_out_deg_firms_list)
## company name industry
## 1 IBM information technology and services
## 2 AT&T telecommunications
## 3 Hewlett Packard Enterprise information technology and services
## 4 JPMorgan Chase & Co. financial services
## 5 Bank of America banking
## 6 Accenture information technology and services
## 7 GE electrical/electronic manufacturing
## 8 Wells Fargo financial services
## 9 Citi financial services
## 10 Target retail
## city country employee count
## 1 Armonk, New York United States 771986
## 2 Dallas United States 321692
## 3 Palo Alto United States 506236
## 4 New York United States 270061
## 5 Charlotte United States 326301
## 6 Dublin 2 Ireland 480235
## 7 Boston United States 142190
## 8 San Francisco United States 302434
## 9 New York United States 323488
## 10 Minneapolis United States 233172
You’ll notice that the firms with the highest degree are biased towards larger firms. Explain why we might expect this type of correlation?
Answer
Companies tend to hire employees from larger organization because
# create data set for linear regression for in-degree data
<- df_edges_in_deg_data %>%
df_edges_in_deg_data_reg inner_join(df_company, by = c("name" = "company_id")) %>%
select(x,emp_count) %>%
rename("in degree" = "x", "employee count" = "emp_count")
# create data set for linear regression for out-degree data
<- df_edges_out_deg_data %>%
df_edges_out_deg_data_reg inner_join(df_company, by = c("name" = "company_id")) %>%
select(x,emp_count) %>%
rename("out degree" = "x", "employee count" = "emp_count")
# create a linear model for in-degree data
= lm(df_edges_in_deg_data_reg$`in degree`~df_edges_in_deg_data_reg$`employee count`,
lr_in_deg data = df_edges_in_deg_data_reg)
options(scipen=999)
summary(lr_in_deg)
##
## Call:
## lm(formula = df_edges_in_deg_data_reg$`in degree` ~ df_edges_in_deg_data_reg$`employee count`,
## data = df_edges_in_deg_data_reg)
##
## Residuals:
## Min 1Q Median 3Q Max
## -456.46 -42.82 -3.25 47.08 143.81
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 136.62554978 3.40651346 40.11
## df_edges_in_deg_data_reg$`employee count` 0.00095446 0.00004365 21.87
## Pr(>|t|)
## (Intercept) <0.0000000000000002 ***
## df_edges_in_deg_data_reg$`employee count` <0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 65.47 on 471 degrees of freedom
## Multiple R-squared: 0.5038, Adjusted R-squared: 0.5027
## F-statistic: 478.2 on 1 and 471 DF, p-value: < 0.00000000000000022
# create a linear model for out-degree data
= lm(df_edges_out_deg_data_reg$`out degree`~df_edges_out_deg_data_reg$`employee count`,
lr_out_deg data = df_edges_out_deg_data_reg)
options(scipen=999)
summary(lr_out_deg)
##
## Call:
## lm(formula = df_edges_out_deg_data_reg$`out degree` ~ df_edges_out_deg_data_reg$`employee count`,
## data = df_edges_out_deg_data_reg)
##
## Residuals:
## Min 1Q Median 3Q Max
## -507.64 -49.69 -2.16 56.89 134.65
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 132.94148993 3.68439167 36.08
## df_edges_out_deg_data_reg$`employee count` 0.00105532 0.00004721 22.36
## Pr(>|t|)
## (Intercept) <0.0000000000000002 ***
## df_edges_out_deg_data_reg$`employee count` <0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 70.81 on 471 degrees of freedom
## Multiple R-squared: 0.5148, Adjusted R-squared: 0.5138
## F-statistic: 499.8 on 1 and 471 DF, p-value: < 0.00000000000000022
Summary
Both in-degree and out-degree linear regression model have p value which are < 0.05 which makes them statistically significant for 95% confidence interval. For every one unit of change in employee count of the company
Weight is migration count (migration_count) divided by the total number of employees (emp_count) of the starting firm. For example, if firm A has a total of 50 employees, and 10 workers went from firm A to firm B, the weight of the edge A → B would be 0.2.
# create a new dataframe to calculate the weighted edge
= df_edges %>%
df_edges_weighted inner_join(df_company, by = c("from"="company_id")) %>%
select(from,to,migration_count,emp_count) %>%
mutate(weight = migration_count/emp_count, .after = to)
# view weighted graph
df_edges_weighted
# extract only first three columns to create a directed igrapgh
= df_edges_weighted[1:3]
df_edges_weighted
# create an igraph object using the df_edges_weighted dataframe
= graph_from_data_frame(df_edges_weighted,directed = TRUE)
igrapgh_weighted
# print the igrapgh object igrapgh_talent_flows
print(igrapgh_weighted, e=TRUE, v=TRUE)
## IGRAPH d4dab9f DNW- 473 81114 --
## + attr: name (v/c), weight (e/n)
## + edges from d4dab9f (vertex names):
## [1] at&t ->oracle
## [2] colgate-palmolive ->nike
## [3] agilent-technologies->stryker
## [4] ebay ->expedia
## [5] comcast ->republic-services-inc
## [6] aon ->aig
## [7] costco-wholesale ->apple
## [8] facebook ->cisco
## + ... omitted several edges
# view vertex
V(igrapgh_weighted)
## + 473/473 vertices, named, from d4dab9f:
## [1] at&t colgate-palmolive
## [3] agilent-technologies ebay
## [5] comcast aon
## [7] costco-wholesale facebook
## [9] john-deere ross-stores
## [11] american-express target
## [13] cme-group jpmorgan-chase
## [15] united-airlines the-home-depot
## [17] xerox wellsfargo
## [19] boeing jefferies
## + ... omitted several vertices
# vertex size
gorder(igrapgh_weighted)
## [1] 473
# view edges
E(igrapgh_weighted)
## + 81114/81114 edges from d4dab9f (vertex names):
## [1] at&t ->oracle
## [2] colgate-palmolive ->nike
## [3] agilent-technologies->stryker
## [4] ebay ->expedia
## [5] comcast ->republic-services-inc
## [6] aon ->aig
## [7] costco-wholesale ->apple
## [8] facebook ->cisco
## [9] john-deere ->ge
## [10] ross-stores ->walmart
## + ... omitted several edges
# edge size
gsize(igrapgh_weighted)
## [1] 81114
# is the graph directed?
is.directed(igrapgh_weighted)
## [1] TRUE
# take top 10 edges by weights
<-
top10_df_edges_weighted as.data.frame(head(df_edges_weighted[order(df_edges_weighted$weight,
decreasing = T),],n=10))
# create igraph
=
igrapgh_top10_df_edges_weighted graph_from_data_frame(top10_df_edges_weighted,directed = TRUE)
# plot the graph
E(igrapgh_top10_df_edges_weighted)$width <- E(igrapgh_top10_df_edges_weighted)$weight
plot(igrapgh_top10_df_edges_weighted,
layout = layout.kamada.kawai,
edge.arrow.size = .2,
edge.color="#CB4335",
vertex.color="#F8C471",
vertex.label.color="#17202A",
vertex.label.cex=1,
edge.label = round(E(igrapgh_top10_df_edges_weighted)$weight,2),
edge.width = 1)
Summary
Based on the edge weight it appears there is a high movement between two branch or subsidiaries of same company HP. For example HP -> Hewlett packard enterprise has the highest edge weight of all where HP the printer and PC arm of the company, while Hewlett packard enterprise deals with enterprise products and services.
The random surfer model explains that the probability of a random user visiting a web page is equal to the sum off all probabilities with which the user visited from the inbound links from that page. Also in random surfer model it is assumed that the link which is clicked next is random and the quality of the landing page is not considered. Thus the likelhood with which a surfer be on a given page can be arrived through a page rank. Higher the page rank more likely the random surfer will be visiting on that page which mean more in-bound links pointing to that web page. the weighted edges tells us how much outbound links from a->b. How many time b is referenced by a, higher the weight higher the inbound link of b from a. Thus the page rank of b goes up. Weights helps with page ranking in a graph.
# create a igraph from the weighted dataframe df_edges_weighted
= graph_from_data_frame(df_edges_weighted,directed = TRUE)
df_edges_weighted_igraph
# capture the weighted pagerank
= page_rank(df_edges_weighted_igraph,
pagerank_weighted weights = df_edges_weighted_igraph$weight)$vector
# capture unweighted pagerank
= page_rank(df_edges_weighted_igraph,weights = NA)$vector
pagerank_unweighted
# save the weighted and unweighted pagerank as a dataframe for sorting
= as.data.frame(pagerank_weighted)
pagerank_weighted_df = as.data.frame(pagerank_unweighted)
pagerank_unweighted_df
# select top 10 node by weighted pagerank
= pagerank_weighted_df %>%
pagerank_weighted_df_top10 arrange(desc(pagerank_weighted)) %>%
top_n(10)
# select top 10 node by unweighted pagerank
= pagerank_unweighted_df %>%
pagerank_unweighted_df_top10 arrange(desc(pagerank_unweighted)) %>%
top_n(10)
print("Top 10 nodes with the highest weighted PageRank")
## [1] "Top 10 nodes with the highest weighted PageRank"
pagerank_weighted_df_top10
print("Top 10 nodes with the highest unweighted PageRank")
## [1] "Top 10 nodes with the highest unweighted PageRank"
pagerank_unweighted_df_top10
# create chart attributes for visualizations
<- "#73C6B6"
chartcolor <- theme_bw() +
mychartattributes theme(text = element_text(size=10),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(color = "gray"),
axis.ticks.x = element_blank(),
axis.ticks.y = element_blank()
)
# print weighted pagerank histogram using ggplot() function
<- ggplot(pagerank_weighted_df, aes(x=pagerank_weighted)) +
weighted_hist geom_histogram(
binwidth = 0.001,
fill = chartcolor, color = "white") +
stat_bin(
binwidth = 0.001,
aes(y=..count.., label=..count..),
geom="text", vjust=-.5) +
labs(title = "Frequency of weighted pagerank",
caption = "Source: LinkedIn data",
x = "Weighted PageRank",
y = "Frequency"
+
) +
mychartattributes scale_fill_gradient("Count") +
scale_y_continuous(breaks = c(0,50,100,150,200,250,300))
# print unweighted pagerank histogram using ggplot() function
<-ggplot(pagerank_unweighted_df, aes(x=pagerank_unweighted)) +
unweighted_hist geom_histogram(
binwidth = 0.001,
fill = chartcolor, color = "white") +
stat_bin(
binwidth = 0.001,
aes(y=..count.., label=..count..),
geom="text", vjust=-.5) +
labs(title = "Frequency of unweighted pagerank",
caption = "Source: LinkedIn data",
x = "Unweighted PageRank",
y = "Frequency"
+
) +
mychartattributes scale_fill_gradient("Count") +
scale_y_continuous(breaks = c(0,50,100,150,200,250,300))
plot_grid(weighted_hist,unweighted_hist)
Summary
The weighted pagerank histogram shows that there is a distinct spike in frequency of companies with lower pagerank. This means higher the employees move from company A to B, lesser the company is likely to be visited by the random surfer, keeping the employee count same before and after weighting. In case of unweighted pagerank histogram the frequency is disributed higher for companies whose page rank is higher than 0.000. This is opposite of companies with weighted page rank.
<- cluster_walktrap(
community_cluster
df_edges_weighted_igraph,weights = E(df_edges_weighted_igraph)$weight,
steps = 4,
merges = TRUE,
modularity = TRUE,
membership = TRUE
)
print(paste("Number of communities observed:",length(community_cluster)))
## [1] "Number of communities observed: 10"
# plot the community cluster
plot(community_cluster,df_edges_weighted_igraph,
layout = layout_with_fr, edge.arrow.size = .1,
edge.color="#85929E",
vertex.color="#F8C471",
vertex.label="",
edge.width = 1,
vertex.size = 7)
<- membership(community_cluster)
community_members
= data.frame(as_ids(V(df_edges_weighted_igraph)),
community_members_df $membership)
community_clusternames(community_members_df) = c("company_id","community_cluster")
<- community_members_df %>%
community_members_list inner_join(df_company, company_id=company_id) %>%
select(name,community_cluster,industry,emp_count) %>%
arrange(community_cluster)
<- community_members_list %>%
community_members_list_1 filter(community_cluster==1)
<- community_members_list %>%
community_members_list_2 filter(community_cluster==2)
<- community_members_list %>%
community_members_list_3 filter(community_cluster==3)
<- community_members_list %>%
community_members_list_4 filter(community_cluster==4)
<- community_members_list %>%
community_members_list_5 filter(community_cluster==5)
<- community_members_list %>%
community_members_list_6 filter(community_cluster==6)
<- community_members_list %>%
community_members_list_7 filter(community_cluster==7)
<- community_members_list %>%
community_members_list_8 filter(community_cluster==8)
<- community_members_list %>%
community_members_list_9 filter(community_cluster==9)
<- community_members_list %>%
community_members_list_10 filter(community_cluster==10)
#1
<- ggplot(community_members_list_1, aes(x = industry, y = emp_count)) +
plot1 geom_bar(stat="identity",
fill = chartcolor
+
) labs(
x = "Industry",
y = "Employee Count"
+
) +
mychartattributes scale_y_continuous(labels = scales::comma) +
coord_flip()
#2
<- ggplot(community_members_list_2, aes(x = industry, y = emp_count)) +
plot2 geom_bar(stat="identity",
fill = chartcolor
+
) labs(
x = "Industry",
y = "Employee Count"
+
) +
mychartattributes scale_y_continuous(labels = scales::comma) +
coord_flip()
#3
<- ggplot(community_members_list_3, aes(x = industry, y = emp_count)) +
plot3 geom_bar(stat="identity",
fill = chartcolor
+
) labs(
x = "Industry",
y = "Employee Count"
+
) +
mychartattributes scale_y_continuous(labels = scales::comma) +
coord_flip()
#4
<- ggplot(community_members_list_4, aes(x = industry, y = emp_count)) +
plot4 geom_bar(stat="identity",
fill = chartcolor
+
) labs(
x = "Industry",
y = "Employee Count"
+
) +
mychartattributes scale_y_continuous(labels = scales::comma) +
coord_flip()
#5
<- ggplot(community_members_list_5, aes(x = industry, y = emp_count)) +
plot5 geom_bar(stat="identity",
fill = chartcolor
+
) labs(
x = "Industry",
y = "Employee Count"
+
) +
mychartattributes scale_y_continuous(labels = scales::comma) +
coord_flip()
#6
<- ggplot(community_members_list_6, aes(x = industry, y = emp_count)) +
plot6 geom_bar(stat="identity",
fill = chartcolor
+
) labs(
x = "Industry",
y = "Employee Count"
+
) +
mychartattributes scale_y_continuous(labels = scales::comma) +
coord_flip()
#7
<- ggplot(community_members_list_7, aes(x = industry, y = emp_count)) +
plot7 geom_bar(stat="identity",
fill = chartcolor
+
) labs(x = "Industry",
y = "Employee Count"
+
) +
mychartattributes scale_y_continuous(labels = scales::comma) +
coord_flip()
#8
<- ggplot(community_members_list_8, aes(x = industry, y = emp_count)) +
plot8 geom_bar(stat="identity",
fill = chartcolor
+
) labs(x = "Industry",
y = "Employee Count"
+
) +
mychartattributes scale_y_continuous(labels = scales::comma) +
coord_flip()
#9
<- ggplot(community_members_list_9, aes(x = industry, y = emp_count)) +
plot9 geom_bar(stat="identity",
fill = chartcolor
+
) labs(x = "Industry",
y = "Employee Count"
+
) +
mychartattributes scale_y_continuous(labels = scales::comma) +
coord_flip()
#10
<- ggplot(community_members_list_10, aes(x = industry, y = emp_count)) +
plot10 geom_bar(stat="identity",
fill = chartcolor
+
) labs(x = "Industry",
y = "Employee Count"
+
) +
mychartattributes scale_y_continuous(labels = scales::comma) +
coord_flip()
plot1
plot2
plot3
plot4
plot5
plot6
plot7
plot8
plot9
plot10
Summary
Companies in each community tend to fall in a specific group of industry. For example
Groups having more than one industry is dominated by one or two industry sectors in terms of employee strength. This is not surprising. For example information technology and services has the maximum employee count in group 1
It is a phenomenon where nodes in a network tend to connect to other nodes of similar in nature. we say a network exhibits assortative mixing if the nodes of the network have many connections and it tend to connect to other nodes having many connections. This is also called homophily or assortment. Assortment coefficient is generally used to measure the homophily in a network.
# create an igraph to understand the network homophily using df_edges and df_company datasets
= graph_from_data_frame(df_edges, vertices = df_company, directed = TRUE)
igraph_homophily
# since we need to understand the homophily of network by calculating
# the assortative coefficient of industry, we need to assign the industry to the nodes
V(igraph_homophily)$industry = df_company$industry
# calculate the assortativity coefficient of industry affiliation of the companies
= assortativity.nominal(igraph_homophily,
assortativity_coefficient types =as.factor(V(igraph_homophily)$industry),
directed = TRUE)
# lets find out assortativity_degree of the network
= assortativity.degree(igraph_homophily, directed = TRUE)
assortativity_deg
# print result
print(paste("The level of assortative mixing for industry in the network is: ",
round(assortativity_coefficient,6)))
## [1] "The level of assortative mixing for industry in the network is: 0.030048"
print(paste("The Assortativity Degree of the network is: ",
round(assortativity_deg,6)))
## [1] "The Assortativity Degree of the network is: -0.198701"
Summary
The assortativity mixing for industry in the talent flow dataset is positive. This means the connected vertices do exhibit homophily in the network. This means the people who migrate from company to another company tend to be in similar indutsry. In other words, companies belonging to similar industry to tend to connected more in the network. This confirms the community detection analysis we performed in the ealrier section.
Also the assortativity degree shows its negative which means the highly connected companies do not preferentially connect with other highly connected companies of similar industry.
We do not observe disassortative mixing in this network based on the assortativity coefficient. If there was disassortative mixing the assortativity coefficient should have been negative. But it was positive in our case.