Network Maniupulation Script
This script contains code computing indegree/outdegree, homophily
measures, and visualising both networks and sub-communities within
networks
# Read in your edgelist. We will use 2021 as an example
edges_21 = read.csv("~/Downloads/edgeList_closeFrds_fa21.csv")
# Creating a nodelist of all nominators and nominees
nodes_21 = data.frame(PID=unlist(edges_21, use.names = FALSE)) %>%
distinct(PID, .keep_all = FALSE)
# Creating our tidygraph object from the edgelist and nodelist. g_21 is the name of our graph object
g_21 = graph_from_data_frame(edges_21, directed = TRUE, vertices = nodes_21)
g_21 = as_tbl_graph(g_21)
Indegree and Outdegree
# tidygraph/ggraph make it very simple to compute in-degree and out-degree for each participant
nodes_21 = nodes_21 %>%
mutate(degree = degree(g_21)) %>%
mutate(indegree = degree(g_21, mode = "in")) %>%
mutate(outdegree = degree(g_21, mode = "out"))
Homophily
# We can join different participant information to our node-list, and generate different network stats. eg. we can join the dorm of each person in the nodelist and calculate network modularity (homophily) by dorm. Modularity scores of +1 means a high degree of dorm homophily, and -1 means a high degree of dorm heterophily
# Reading our PID information csv which contains dorm information
PID_21 = read.csv("/Volumes/GoogleDrive/Shared drives/Box SSNL Folder/SSNL Social Networks Study/2021-2022/Rosters/Housing Rosters/PIDinfo_long_2021-2022_autoentry_Special_Characters_Changed_wHousing.csv") %>% select(PID,dorm, DID, cohortYear)
# Joining PID info to our node-list
# Removing duplicates
PID_21 = PID_21 %>%
distinct(PID, .keep_all = TRUE)
nodes_21 = nodes_21 %>%
left_join(PID_21, by = "PID")
# We have to re-create our network as a tidygraph object if we add additional variables
g_21 = graph_from_data_frame(edges_21, directed = TRUE, vertices = nodes_21)
g_21 = as_tbl_graph(g_21)
# Network Modularity by dorm using igraph package
modularity(g_21, as.factor(V(g_21)$dorm))
## [1] 0.4290863
# eg.2 - We can also join trait information and calculate network stats using that
# Reading in our trait data
trait_21 = read.csv("~/Downloads/df.trait_fa21.csv") %>%
select(PID, partyID)
# Let's calculate modularity by partyID
# Removing Duplicates
trait_21 = trait_21 %>%
distinct(PID, .keep_all = TRUE)
# Joining our new info to our nodelist
nodes_21 = nodes_21 %>%
left_join(trait_21, by = "PID")
# Recreate tidygraph object
g_21 = graph_from_data_frame(edges_21, directed = TRUE, vertices = nodes_21)
g_21 = as_tbl_graph(g_21)
# Modularity by partyID
modularity(g_21, as.factor(V(g_21)$partyID))
## [1] -0.08084813
Network Visualisation
# This is some code that does a basic visualisation of our Stanford undergrad network. You can assign nodes as different colors. In this case, each node is assigned a color based on it's partyID
g_21 %>%
activate(nodes) %>%
ggraph(layout = 'stress') +
geom_edge_fan(width = .1, color = 'lightblue') +
geom_node_point(aes(color = partyID), size = 0.05) +
guides(color = guide_legend(override.aes = list(size = 5))) +
coord_fixed() +
theme_graph()

# In this example, there are lots of NA's so it might be worth removing them from the nodelist and re-running the code
# Lets remove these partyID NA's from our network. We want to look at our nodelist (our list of all people in the network), and remove those who we don't have political data for
nodes_21 = nodes_21 %>%
filter(!is.na(partyID)) %>%
filter(partyID != "")
# Now lets filter our edgelist to only include connections in our new nodelist
edges_21 = edges_21 %>%
filter(PID %in% nodes_21$PID & nom %in% nodes_21$PID)
# Remaking our tidygraph object
g_21 = graph_from_data_frame(edges_21, directed = TRUE, vertices = nodes_21)
g_21 = as_tbl_graph(g_21)
# Now lets re-visualise our network now all NA's have been removed
g_21 %>%
activate(nodes) %>%
ggraph(layout = 'stress') +
geom_edge_fan(width = .1, color = 'lightblue') +
geom_node_point(aes(color = partyID), size = 0.05) +
guides(color = guide_legend(override.aes = list(size = 5))) +
coord_fixed() +
theme_graph()

# Clean!
# Let's see if removing NA's changes our modularity measure.
modularity(g_21, as.factor(V(g_21)$partyID))
## [1] 0.04418045
# We've gone from -0.08084813 to 0.04418045 by removing all NA's. Bear in mind our network is much smaller now we've removed all people who didn't submit political data