## convert csv files to igraph objects
## and use igraph methods to obtain degree-based subsets

  rm(list=ls())
  library(igraph)
## 
## Attaching package: 'igraph'
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## The following object is masked from 'package:base':
## 
##     union
##############################################################################
## Read in datasets
##############################################################################

  ## from csv files
  fb.datasets <- intersect(list.files(pattern=("fb"), path="../"),
                           list.files(pattern=("csv"), path="../")
                           )
  fb.datasets
## [1] "#fb_graph_nodes.csv#"                            
## [2] "fb_graph_edges.csv"                              
## [3] "fb_graph_matrix_deduplicated.csv"                
## [4] "fb_graph_matrix.csv"                             
## [5] "fb_graph_nodes.csv"                              
## [6] "fb_graph_resp_edges_only_edges.csv"              
## [7] "fb_graph_resp_edges_only_matrix_deduplicated.csv"
## [8] "fb_graph_resp_edges_only_matrix.csv"             
## [9] "fb_graph_resp_edges_only_nodes.csv"
##############################################################################

##############################################################################
## Obtain full ego network
##############################################################################

  ## Nodes
  full.node.data <- read.csv("../fb_graph_nodes.csv",
                             header=TRUE)
  full.node.list <- as.character(full.node.data[,1])
  length(full.node.list)
## [1] 182998
  ## Edges
  fb.edge.data <- read.csv("../fb_graph_edges.csv",
                           header=TRUE)

  head(fb.edge.data)
##                        Source   Target       Type           timestamp
## 1 _xDvXlS7mwBWvVOnR0_zm_S1REI 22222827 Undirected 2013-07-01 13:02:54
## 2 HK32l4sI-okhtszh8moPwUh4XDE 22223149 Undirected 2013-11-14 11:43:45
## 3 QEu7oYINTvMDC0zngc26yu-GA-Y 22228300 Undirected 2014-06-11 11:18:49
## 4 dAEIeSaUPvFwUNt8AWGTKVq8ar4 22228550 Undirected 2014-07-21 09:38:35
## 5 hTJkzVa-Q7LFjKAh7McMitgZO28 22228547 Undirected 2014-07-20 13:03:20
## 6 G_zYyGY3sfr8mV20u22knTw4ssE 11111019 Undirected 2013-06-17 13:10:57
##   linked_fb fb_message_count linked_fbsurvey linked_coupon
## 1         1                0               0             0
## 2         1                0               0             0
## 3         1                0               0             0
## 4         1                0               0             0
## 5         1                0               0             0
## 6         1                0               0             0
##   linked_fbmessages linked_stats mutual_friends pct_fb_mutual
## 1                 0            0             NA            NA
## 2                 0            0             NA            NA
## 3                 0            0             NA            NA
## 4                 0            0             NA            NA
## 5                 0            0             NA            NA
## 6                 0            0             NA            NA
  fb.edge.data.list <- fb.edge.data[,c(1:2)]
  fb.edge.data.list <- apply(fb.edge.data.list, c(1:2), as.character)
  head(fb.edge.data.list)
##      Source                        Target    
## [1,] "_xDvXlS7mwBWvVOnR0_zm_S1REI" "22222827"
## [2,] "HK32l4sI-okhtszh8moPwUh4XDE" "22223149"
## [3,] "QEu7oYINTvMDC0zngc26yu-GA-Y" "22228300"
## [4,] "dAEIeSaUPvFwUNt8AWGTKVq8ar4" "22228550"
## [5,] "hTJkzVa-Q7LFjKAh7McMitgZO28" "22228547"
## [6,] "G_zYyGY3sfr8mV20u22knTw4ssE" "11111019"
  w1.ig <- graph.edgelist(fb.edge.data.list, directed=FALSE)
  w1.ig
## IGRAPH 7b7513b UN-- 182998 327741 -- 
## + attr: name (v/c)
## + edges from 7b7513b (vertex names):
## [1] _xDvXlS7mwBWvVOnR0_zm_S1REI--22222827
## [2] HK32l4sI-okhtszh8moPwUh4XDE--22223149
## [3] QEu7oYINTvMDC0zngc26yu-GA-Y--22228300
## [4] dAEIeSaUPvFwUNt8AWGTKVq8ar4--22228550
## [5] hTJkzVa-Q7LFjKAh7McMitgZO28--22228547
## [6] G_zYyGY3sfr8mV20u22knTw4ssE--11111019
## [7] G_zYyGY3sfr8mV20u22knTw4ssE--22222382
## [8] SS5L7tN-OqT6SxlzPDGEcchUYCc--11111127
## + ... omitted several edges
  ## add attributes to these vertices
  V(w1.ig)$fb_age <- full.node.data$fb_age
  V(w1.ig)$chicago <- full.node.data$fb_city_chicago
  #V(w1.ig)$sex <- full.node.data$fb_sex
  V(w1.ig)$sex_male <- full.node.data$fb_sex_male

  ## Order vertices by vname
  vnames <- V(w1.ig)$name
  vnames_ranks <- vector(length=length(vnames))
  vnames_ranks[order(vnames)] <- 1:length(vnames)
  w1.ig <- permute.vertices(w1.ig, vnames_ranks)

  ## ## Order vertices by vname 
  ## (FOLLOWING CODE WILL NEED TO BE UPDATED TO INCLUDE 1111, 2222 AT BEGINNING,
  ## EVERYONE ELSE FOLLOWING)
  ## vnames <- V(w2.ig)$name
  ## vnames_ranks <- vector(length=length(vnames))
  ## vnames_ranks[order(vnames)] <- 1:length(vnames)
  ## w2.ig <- permute.vertices(w2.ig, vnames_ranks)

  ## Obtain degree-based subsets
  V(w1.ig)$degree <- degree(w1.ig)
  w1.ig.deg.greq.2 <- induced.subgraph(w1.ig, V(w1.ig)[degree >= 2])
  w1.ig.deg.greq.3 <- induced.subgraph(w1.ig, V(w1.ig)[degree >= 3])
  w1.ig.deg.greq.4 <- induced.subgraph(w1.ig, V(w1.ig)[degree >= 4])
  
  ## Obtain communication network
  messnet.edge.ids <- which(fb.edge.data$linked_fbmessages==1)
  messnet.edge.data <- fb.edge.data[messnet.edge.ids,]
  head(messnet.edge.data); dim(messnet.edge.data)
##                          Source   Target       Type           timestamp
## 9   _LGsgakDwIYGNb2tem3TMXNYd9c 11111036 Undirected 2013-06-21 12:55:40
## 30  lPSN9aNUZXiFOuAYW1hbR6yeqcQ 11111309 Undirected 2013-12-13 14:00:54
## 50  2yc3ncZbSPWCTDgndZZuxQJAg20 11111058 Undirected 2013-07-22 11:20:34
## 91  XacajsJXDCeh5ThT1El9GHPaGyA 11111304 Undirected 2013-10-21 12:55:41
## 99  U6aC-BNosTzhoAnnS5vAJ3U_uwE 22223432 Undirected 2013-07-31 11:46:49
## 114 A4x72Qq9ag2LLDxX5Pvqj19usfE 22222957 Undirected 2013-11-06 09:20:17
##     linked_fb fb_message_count linked_fbsurvey linked_coupon
## 9           1                2               0             0
## 30          1                2               0             0
## 50          1                2               0             0
## 91          1               57               0             0
## 99          1                2               0             0
## 114         1                2               0             0
##     linked_fbmessages linked_stats mutual_friends pct_fb_mutual
## 9                   1            0             NA            NA
## 30                  1            0             NA            NA
## 50                  1            0             NA            NA
## 91                  1            0             NA            NA
## 99                  1            0             NA            NA
## 114                 1            0             NA            NA
## [1] 12493    12
  messnet.edge.data.list <- messnet.edge.data[,c(1:2)]
  messnet.edge.data.list <- apply(messnet.edge.data.list, c(1:2), as.character)
  head(messnet.edge.data.list)
##     Source                        Target    
## 9   "_LGsgakDwIYGNb2tem3TMXNYd9c" "11111036"
## 30  "lPSN9aNUZXiFOuAYW1hbR6yeqcQ" "11111309"
## 50  "2yc3ncZbSPWCTDgndZZuxQJAg20" "11111058"
## 91  "XacajsJXDCeh5ThT1El9GHPaGyA" "11111304"
## 99  "U6aC-BNosTzhoAnnS5vAJ3U_uwE" "22223432"
## 114 "A4x72Qq9ag2LLDxX5Pvqj19usfE" "22222957"
  w1.messnet <- graph.edgelist(messnet.edge.data.list, directed=FALSE)

  save.image("w1_igraph_subsets.RData")