## convert csv files to igraph objects
## and use igraph methods to obtain degree-based subsets
rm(list=ls())
library(igraph)
##
## Attaching package: 'igraph'
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
##############################################################################
## Read in datasets
##############################################################################
## from csv files
fb.datasets <- intersect(list.files(pattern=("fb"), path="../"),
list.files(pattern=("csv"), path="../")
)
fb.datasets
## [1] "#fb_graph_nodes.csv#"
## [2] "fb_graph_edges.csv"
## [3] "fb_graph_matrix_deduplicated.csv"
## [4] "fb_graph_matrix.csv"
## [5] "fb_graph_nodes.csv"
## [6] "fb_graph_resp_edges_only_edges.csv"
## [7] "fb_graph_resp_edges_only_matrix_deduplicated.csv"
## [8] "fb_graph_resp_edges_only_matrix.csv"
## [9] "fb_graph_resp_edges_only_nodes.csv"
##############################################################################
##############################################################################
## Obtain full ego network
##############################################################################
## Nodes
full.node.data <- read.csv("../fb_graph_nodes.csv",
header=TRUE)
full.node.list <- as.character(full.node.data[,1])
length(full.node.list)
## [1] 182998
## Edges
fb.edge.data <- read.csv("../fb_graph_edges.csv",
header=TRUE)
head(fb.edge.data)
## Source Target Type timestamp
## 1 _xDvXlS7mwBWvVOnR0_zm_S1REI 22222827 Undirected 2013-07-01 13:02:54
## 2 HK32l4sI-okhtszh8moPwUh4XDE 22223149 Undirected 2013-11-14 11:43:45
## 3 QEu7oYINTvMDC0zngc26yu-GA-Y 22228300 Undirected 2014-06-11 11:18:49
## 4 dAEIeSaUPvFwUNt8AWGTKVq8ar4 22228550 Undirected 2014-07-21 09:38:35
## 5 hTJkzVa-Q7LFjKAh7McMitgZO28 22228547 Undirected 2014-07-20 13:03:20
## 6 G_zYyGY3sfr8mV20u22knTw4ssE 11111019 Undirected 2013-06-17 13:10:57
## linked_fb fb_message_count linked_fbsurvey linked_coupon
## 1 1 0 0 0
## 2 1 0 0 0
## 3 1 0 0 0
## 4 1 0 0 0
## 5 1 0 0 0
## 6 1 0 0 0
## linked_fbmessages linked_stats mutual_friends pct_fb_mutual
## 1 0 0 NA NA
## 2 0 0 NA NA
## 3 0 0 NA NA
## 4 0 0 NA NA
## 5 0 0 NA NA
## 6 0 0 NA NA
fb.edge.data.list <- fb.edge.data[,c(1:2)]
fb.edge.data.list <- apply(fb.edge.data.list, c(1:2), as.character)
head(fb.edge.data.list)
## Source Target
## [1,] "_xDvXlS7mwBWvVOnR0_zm_S1REI" "22222827"
## [2,] "HK32l4sI-okhtszh8moPwUh4XDE" "22223149"
## [3,] "QEu7oYINTvMDC0zngc26yu-GA-Y" "22228300"
## [4,] "dAEIeSaUPvFwUNt8AWGTKVq8ar4" "22228550"
## [5,] "hTJkzVa-Q7LFjKAh7McMitgZO28" "22228547"
## [6,] "G_zYyGY3sfr8mV20u22knTw4ssE" "11111019"
w1.ig <- graph.edgelist(fb.edge.data.list, directed=FALSE)
w1.ig
## IGRAPH 7b7513b UN-- 182998 327741 --
## + attr: name (v/c)
## + edges from 7b7513b (vertex names):
## [1] _xDvXlS7mwBWvVOnR0_zm_S1REI--22222827
## [2] HK32l4sI-okhtszh8moPwUh4XDE--22223149
## [3] QEu7oYINTvMDC0zngc26yu-GA-Y--22228300
## [4] dAEIeSaUPvFwUNt8AWGTKVq8ar4--22228550
## [5] hTJkzVa-Q7LFjKAh7McMitgZO28--22228547
## [6] G_zYyGY3sfr8mV20u22knTw4ssE--11111019
## [7] G_zYyGY3sfr8mV20u22knTw4ssE--22222382
## [8] SS5L7tN-OqT6SxlzPDGEcchUYCc--11111127
## + ... omitted several edges
## add attributes to these vertices
V(w1.ig)$fb_age <- full.node.data$fb_age
V(w1.ig)$chicago <- full.node.data$fb_city_chicago
#V(w1.ig)$sex <- full.node.data$fb_sex
V(w1.ig)$sex_male <- full.node.data$fb_sex_male
## Order vertices by vname
vnames <- V(w1.ig)$name
vnames_ranks <- vector(length=length(vnames))
vnames_ranks[order(vnames)] <- 1:length(vnames)
w1.ig <- permute.vertices(w1.ig, vnames_ranks)
## ## Order vertices by vname
## (FOLLOWING CODE WILL NEED TO BE UPDATED TO INCLUDE 1111, 2222 AT BEGINNING,
## EVERYONE ELSE FOLLOWING)
## vnames <- V(w2.ig)$name
## vnames_ranks <- vector(length=length(vnames))
## vnames_ranks[order(vnames)] <- 1:length(vnames)
## w2.ig <- permute.vertices(w2.ig, vnames_ranks)
## Obtain degree-based subsets
V(w1.ig)$degree <- degree(w1.ig)
w1.ig.deg.greq.2 <- induced.subgraph(w1.ig, V(w1.ig)[degree >= 2])
w1.ig.deg.greq.3 <- induced.subgraph(w1.ig, V(w1.ig)[degree >= 3])
w1.ig.deg.greq.4 <- induced.subgraph(w1.ig, V(w1.ig)[degree >= 4])
## Obtain communication network
messnet.edge.ids <- which(fb.edge.data$linked_fbmessages==1)
messnet.edge.data <- fb.edge.data[messnet.edge.ids,]
head(messnet.edge.data); dim(messnet.edge.data)
## Source Target Type timestamp
## 9 _LGsgakDwIYGNb2tem3TMXNYd9c 11111036 Undirected 2013-06-21 12:55:40
## 30 lPSN9aNUZXiFOuAYW1hbR6yeqcQ 11111309 Undirected 2013-12-13 14:00:54
## 50 2yc3ncZbSPWCTDgndZZuxQJAg20 11111058 Undirected 2013-07-22 11:20:34
## 91 XacajsJXDCeh5ThT1El9GHPaGyA 11111304 Undirected 2013-10-21 12:55:41
## 99 U6aC-BNosTzhoAnnS5vAJ3U_uwE 22223432 Undirected 2013-07-31 11:46:49
## 114 A4x72Qq9ag2LLDxX5Pvqj19usfE 22222957 Undirected 2013-11-06 09:20:17
## linked_fb fb_message_count linked_fbsurvey linked_coupon
## 9 1 2 0 0
## 30 1 2 0 0
## 50 1 2 0 0
## 91 1 57 0 0
## 99 1 2 0 0
## 114 1 2 0 0
## linked_fbmessages linked_stats mutual_friends pct_fb_mutual
## 9 1 0 NA NA
## 30 1 0 NA NA
## 50 1 0 NA NA
## 91 1 0 NA NA
## 99 1 0 NA NA
## 114 1 0 NA NA
## [1] 12493 12
messnet.edge.data.list <- messnet.edge.data[,c(1:2)]
messnet.edge.data.list <- apply(messnet.edge.data.list, c(1:2), as.character)
head(messnet.edge.data.list)
## Source Target
## 9 "_LGsgakDwIYGNb2tem3TMXNYd9c" "11111036"
## 30 "lPSN9aNUZXiFOuAYW1hbR6yeqcQ" "11111309"
## 50 "2yc3ncZbSPWCTDgndZZuxQJAg20" "11111058"
## 91 "XacajsJXDCeh5ThT1El9GHPaGyA" "11111304"
## 99 "U6aC-BNosTzhoAnnS5vAJ3U_uwE" "22223432"
## 114 "A4x72Qq9ag2LLDxX5Pvqj19usfE" "22222957"
w1.messnet <- graph.edgelist(messnet.edge.data.list, directed=FALSE)
save.image("w1_igraph_subsets.RData")