## Warning: package 'tibble' was built under R version 3.6.2
## Warning: package 'tidyr' was built under R version 3.6.2
## Warning: package 'purrr' was built under R version 3.6.2

construct graph

# 建立一個空的圖
g<-graph.empty(directed = F)
# 增加node
g<-add.vertices(g, 3)
# 增加edge
g<-add.edges(g, c(c(1, 2, 
                    1, 3, 
                    2, 3, 
                    2, 1, 
                    3, 2, 
                    1, 1))) 
# 設定node的feature
V(g)$label <- c("a", "b", "c")
V(g)$color <- c('red', 'blue', 'orange')
V(g)$size <- c(20, 40, 60)
V(g)$member <- c(1, 2, 1)
# 篩選
V(g)[which(V(g)$member ==  1)]
## + 2/3 vertices, from bedfd20:
## [1] 1 3
V(g)[degree(g) > 1]
## + 3/3 vertices, from bedfd20:
## [1] 1 2 3
# 設定edge的feature
E(g)$weight <- c(2, 8, 13, 4, 11, 3)
# 內建E(g)$weight。但是沒有label項,用set_vertex_attr加入edge label
g<-set_vertex_attr(g, "name", value = V(g)$label)
E(g)
## + 6/6 edges from bedfd20 (vertex names):
## [1] a--b a--c b--c a--b b--c a--a

Plot

plot(g, 
     layout          = layout.fruchterman.reingold, 
     edge.arrow.size = 0.4, 
     vertex.color    = V(g)$color, 
     vertex.size     = V(g)$size, 
     edge.width      = E(g)$weight)

data frame to graph

g  <- graph.data.frame(data.frame(
  id1 = c('Bob', 'Mark', 'Red', 'Mat', 'White', 'White', 'Bob'),    
  di2 = c('Red', 'White', 'Mat', 'Blue', 'Bob', 'Mark', 'Mat')))
# 等同於
# g <- graph(edges = c('Bob'  , 'Red', 
#                      'Mark' , 'White', 
#                      'Red'  , 'Mat', 
#                      'Mat'  , 'Blue', 
#                      'White', 'Bob', 
#                      'White', 'Mark', 
#                      'Bob'  , 'Mat'), directed = TRUE)
plot(g, 
     layout = layout.fruchterman.reingold, 
     edge.arrow.size = 0.4, 
     vertex.color = 'lightblue', 
     vertex.size = 25)

g[]
## 6 x 6 sparse Matrix of class "dgCMatrix"
##       Bob Mark Red Mat White Blue
## Bob     .    .   1   1     .    .
## Mark    .    .   .   .     1    .
## Red     .    .   .   1     .    .
## Mat     .    .   .   .     .    1
## White   1    1   .   .     .    .
## Blue    .    .   .   .     .    .

matrix to graph

# 隨便建造的一個矩陣
adjm <- matrix(sample(0:1, 100, replace = TRUE, prob = c(0.9, 0.1)), nc = 10)
## 對矩陣行列命名
rownames(adjm) <- sample(letters, nrow(adjm))
colnames(adjm) <- rownames(adjm) 
# matrix轉成graph
g1 <- graph_from_adjacency_matrix(adjm, weighted = TRUE, 
                                  mode = "undirected", add.rownames = TRUE)
# weighted是否需要加入權重
# mode有directed, undirected, upper, lower, max, min, plus有這麼幾種
# add.rownames或add.colnames定義名稱
g1
## IGRAPH 653e607 UNW- 10 15 -- 
## + attr: name (v/c), TRUE (v/c), weight (e/n)
## + edges from 653e607 (vertex names):
##  [1] s--s s--m s--p s--q s--l t--f t--q t--e m--e r--l r--e f--y q--q q--l
## [15] y--e
plot(g1, 
     layout = layout.fruchterman.reingold, 
     edge.arrow.size = 0.4, 
     vertex.color = 'white', 
     vertex.size = 25)

combine graphs

g1 <- graph.full(4)
g2 <- graph.ring(3)
g <- g1 %du% g2
plot(g)

# Find different
graph.difference(g1, g2, directed = F)
## IGRAPH dfe5e0e U--- 4 3 -- Full graph
## + attr: name (g/c), loops (g/l)
## + edges from dfe5e0e:
## [1] 1--4 2--4 3--4

cluster

g <- make_tree(40, children = 3, mode = "undirected")
# 建構模型
# 其他分群方法
# cluster_label_prop
# cluster_fast_greedy
# spinglass.community
ceb <- cluster_edge_betweenness(g); ceb 
## IGRAPH clustering edge betweenness, groups: 7, mod: 0.68
## + groups:
##   $`1`
##   [1]  1  4 13 38 39 40
##   
##   $`2`
##   [1]  2  6  7 17 18 19 20 21 22
##   
##   $`3`
##   [1]  3  9 10 26 27 28 29 30 31
##   
##   $`4`
##   + ... omitted several groups/vertices
dendPlot(ceb)

plot(ceb, g)

other packages

networkD3

library(networkD3) 
data(MisLinks)
data(MisNodes)
forceNetwork(Links = MisLinks, Nodes = MisNodes, Source = "source", 
             Target = "target", Value = "value", NodeID = "name", 
             Group = "group", opacity = 0.4, zoom = TRUE)
MyNodes = data.frame(name  = c("Google", "Apple", "Amazon", "Youtube", "Paypal"), 
                     group = c("A", "B", "C", "A", "C"), 
                     size  = c(100, 25, 9, 1, 4))
# 描述圈圈間的聯結,source必定大於target
MyLinks = data.frame(source = c(1, 2, 2, 3), 
                     target = c(0, 0, 1, 0), 
                     value  = c(1, 2, 5, 10)) # value代表線的粗細
forceNetwork(Links = MyLinks, Nodes = MyNodes, Source = "source", 
             Target = "target", Value = "value", NodeID = "name", 
             Group = "group", opacity = 1, zoom = TRUE, 
             Nodesize = "size", opacityNoHover = 1) # opacityNoHover 顯示Node ID

qgraph

dataframe要轉成matrix
library(qgraph)

adj = matrix(sample(0:1, 10^2, TRUE, prob = c(0.8, 0.2)), nrow = 10, ncol = 10)
qgraph(adj)
title("Unweighted and directed graphs", line = 2.5)

# Save plot to nonsquare pdf file
qgraph(adj, filetype = 'pdf', height = 5, width = 10)

case study

classroom data

學生互動與成績的關聯

dta <- read.table("student.txt", header = T, sep = ",", row.names = 1)
dta[is.na(dta)] <- 0
head(dta)
# 將成績分成四個等級,成績越好顏色越紅
exam <- read.csv("examscore.csv", header = F) %>% 
  mutate(beh = cut(V2, breaks = quantile(V2, probs = c(0, .25, .5, .75, 1))), 
         col = as.character(factor(beh, labels = c(rev(heat.colors(4))))))
g1 <- graph_from_adjacency_matrix(data.matrix(dta), mode = "undirected")
plot(g1, 
     layout = layout.fruchterman.reingold, 
     edge.arrow.size = 0.4, 
     vertex.color = exam$col, 
     vertex.label.color = "black", 
     vertex.size = degree(g1))

Big5

library("psych")
data(big5)
data(big5groups)
dim(big5)
## [1] 500 240
# 看相關
# round(cor(big5), 2)
# Correlations來畫圖
big5Graph <- qgraph(cor(big5), minimum = 0.3, groups = big5groups, 
                    legend = TRUE, borders = TRUE, title = "Big 5 correlations")

# highlight positive
qgraph(big5Graph, posCol = "red", negCol = "gray")

# highlight negative
qgraph(big5Graph, posCol = "gray", negCol = "blue")

principle analysis

因素分析程序:決定因素數目、因素抽取、轉軸、解釋

# 先利用平行分析決定因素數目
fa.parallel(big5, fa = "pc", show.legend = FALSE)

## Parallel analysis suggests that the number of factors =  NA  and the number of components =  15
# 15 
# 決定因素數後,試作因素分析
# 因素抽取設定為主軸因子法,轉軸是最優轉軸法(斜交)
rst <- fa(big5, nfactor = 15, fm = 'pa', rotate = "promax")
## Loading required namespace: GPArotation
# 利用因素負載排序後呈現,隱藏某些較小係數
# print.psych(rst, cut = .3, sort = TRUE)

將因素分析結果畫成圖

loadings = as.data.frame(unclass(rst$loadings))
t <- loadings %>% 
  mutate(subject = row.names(loadings)) %>% 
  gather(factor, loading, -subject)
Graph <- qgraph(t, minimum = 0.3, groups = big5groups, 
                legend = TRUE, borders = TRUE, title = "")

t <- t %>% 
  mutate(grp = substr(subject,1,1),
         filter = ifelse(loading >= 0.3, 1, 0))
library(lattice)
stripplot(loading ~ grp | factor, data = t,
          groups = filter,
          jitter = T, layout = c(3, 2))

試試看五因素

rst <- fa(big5, nfactor = 5, fm = 'pa', rotate = "promax")
## Loading required namespace: GPArotation
# 太長了用圖表示
# print.psych(rst, cut = .3, sort = TRUE) 

loadings = as.data.frame(unclass(rst$loadings))
t <- loadings %>% 
  mutate(subject = row.names(loadings)) %>% 
  gather(factor, loading, -subject)
Graph <- qgraph(t, minimum = 0.3, groups = big5groups, 
                legend = TRUE, borders = TRUE, title = "")

t <- t %>% 
  mutate(grp = substr(subject,1,1),
         filter = ifelse(loading >= 0.3, 1, 0))
stripplot(loading ~ grp | factor, data = t,
          groups = filter,
          jitter = T, layout = c(3, 2))

復原力、滿意感與自尊量表

dta <- read.csv("edu.csv", header = T)
head(dta)
group <- list()
group$S <- seq(1, 12, 1)
group$R <- seq(13, 30, 1)
group$W <- seq(31, 36, 1)
# Correlations來畫圖
Graph <- qgraph(cor(dta), minimum = 0.4, groups = group, 
                legend = TRUE, borders = TRUE, title = "")

# Highlight negative
qgraph(Graph, posCol = "gray", negCol = "blue")

principle analysis

fa.parallel(dta, fa = "pc", show.legend = FALSE)
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was
## done

## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was
## done

## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was
## done
## Warning in cor.smooth(r): Matrix was not positive definite, smoothing was
## done
## In factor.scores, the correlation matrix is singular, an approximation is used
## Warning in cor.smooth(r): Matrix was not positive definite, smoothing was
## done

## Parallel analysis suggests that the number of factors =  NA  and the number of components =  4
# 4
names(dta) <- c('說心情', '講話困難', '不會表現', '與人相處', '認識交談', '保持聯繫', '重要成員', '被看重的', '有貢獻的', '喜歡我', '重視意見', '喜歡表現', '全力以赴', '不放棄', '解決問題', '達成目標', '找尋資源', '承擔後果', '問題解決', '鼓勵自己', '保持平靜', '往好處想', '理想冒險', '相信幫我', '友人關心', '別人快樂', '常給建議', '感謝支持', '學習技能', '感激別人', '充滿喜悅', '神采飛揚', '快樂的人', '生活沈穩', '滿意生活', '生活豐盛')
rst <- fa(dta, nfactor = 4, fm = 'pa', rotate = "promax")
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was
## done

## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was
## done
## In factor.scores, the correlation matrix is singular, an approximation is used
## Warning in cor.smooth(r): Matrix was not positive definite, smoothing was
## done
print.psych(rst, cut = .3, sort = TRUE)
## Factor Analysis using method =  pa
## Call: fa(r = dta, nfactors = 4, rotate = "promax", fm = "pa")
## Standardized loadings (pattern matrix) based upon correlation matrix
##          item   PA1   PA2   PA3   PA4   h2   u2 com
## 學習技能   29  0.89                   0.68 0.32 1.1
## 感謝支持   28  0.82                   0.58 0.42 1.1
## 解決問題   15  0.82                   0.57 0.43 1.2
## 不放棄     14  0.76                   0.56 0.44 1.1
## 感激別人   30  0.75                   0.55 0.45 1.0
## 達成目標   16  0.69                   0.59 0.41 1.1
## 承擔後果   18  0.69                   0.33 0.67 1.1
## 理想冒險   23  0.66              0.33 0.56 0.44 1.7
## 問題解決   19  0.65                   0.38 0.62 1.1
## 全力以赴   13  0.63                   0.63 0.37 1.3
## 找尋資源   17  0.60                   0.56 0.44 1.1
## 鼓勵自己   20  0.55                   0.46 0.54 1.3
## 保持平靜   21  0.49                   0.30 0.70 1.3
## 別人快樂   26  0.47                   0.55 0.45 1.5
## 常給建議   27  0.46        0.35       0.48 0.52 2.2
## 往好處想   22  0.44                   0.29 0.71 1.8
## 友人關心   25  0.35        0.33       0.47 0.53 3.2
## 滿意生活   35        0.99             0.82 0.18 1.0
## 生活豐盛   36        0.99             0.82 0.18 1.0
## 生活沈穩   34        0.91             0.73 0.27 1.0
## 神采飛揚   32        0.77             0.72 0.28 1.1
## 快樂的人   33        0.73             0.59 0.41 1.1
## 充滿喜悅   31        0.69        0.38 0.80 0.20 1.7
## 喜歡我     10              0.97       0.80 0.20 1.1
## 被看重的    8              0.83       0.66 0.34 1.1
## 重視意見   11              0.81       0.66 0.34 1.1
## 重要成員    7              0.78       0.64 0.36 1.4
## 有貢獻的    9              0.67       0.61 0.39 1.2
## 相信幫我   24              0.48       0.57 0.43 1.7
## 喜歡表現   12              0.45       0.49 0.51 1.5
## 不會表現    3                    0.81 0.70 0.30 1.0
## 認識交談    5                    0.70 0.51 0.49 1.0
## 講話困難    2                    0.67 0.40 0.60 1.1
## 說心情      1                    0.59 0.40 0.60 1.1
## 與人相處    4                    0.57 0.56 0.44 1.2
## 保持聯繫    6                    0.49 0.33 0.67 1.4
## 
##                        PA1  PA2  PA3  PA4
## SS loadings           7.56 4.57 4.55 3.63
## Proportion Var        0.21 0.13 0.13 0.10
## Cumulative Var        0.21 0.34 0.46 0.56
## Proportion Explained  0.37 0.23 0.22 0.18
## Cumulative Proportion 0.37 0.60 0.82 1.00
## 
##  With factor correlations of 
##      PA1  PA2  PA3  PA4
## PA1 1.00 0.55 0.61 0.55
## PA2 0.55 1.00 0.47 0.48
## PA3 0.61 0.47 1.00 0.54
## PA4 0.55 0.48 0.54 1.00
## 
## Mean item complexity =  1.3
## Test of the hypothesis that 4 factors are sufficient.
## 
## The degrees of freedom for the null model are  630  and the objective function was  49.91 with Chi Square of  6795.73
## The degrees of freedom for the model are 492  and the objective function was  27.3 
## 
## The root mean square of the residuals (RMSR) is  0.05 
## The df corrected root mean square of the residuals is  0.05 
## 
## The harmonic number of observations is  150 with the empirical chi square  423.05  with prob <  0.99 
## The total number of observations was  150  with Likelihood Chi Square =  3644.11  with prob <  0 
## 
## Tucker Lewis Index of factoring reliability =  0.331
## RMSEA index =  0.207  and the 90 % confidence intervals are  0.201 0.214
## BIC =  1178.87
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy             
##                                                    PA1  PA2  PA3  PA4
## Correlation of (regression) scores with factors   0.97 0.98 0.97 0.95
## Multiple R square of scores with factors          0.95 0.95 0.93 0.90
## Minimum correlation of possible factor scores     0.89 0.91 0.87 0.79
loadings = as.data.frame(unclass(rst$loadings))
t <- loadings %>% 
  rename("復原力" = 1, "滿意感" = 2, "自尊一" = 3, "自尊二" = 4) %>%
  mutate(s = row.names(loadings)) %>% 
  gather(value, target, -s)
par(family = "黑體-繁 中黑")
Graph <- qgraph(t, minimum = 0.2, groups = group, 
                legend = TRUE, borders = TRUE, title = "", 
                vertex.label.family = "黑體-繁 中黑")
## Warning in qgraph(t, minimum = 0.2, groups = group, legend = TRUE, borders
## = TRUE, : The following arguments are not documented and likely not
## arguments of qgraph and thus ignored: vertex.label.family

商場購物資料集

dat <- read.csv("Rules.csv")
head(dat)

產生Nodes

# 所有商品
all.items = c(as.character(dat[, 1]), 
              as.character(dat[, 2]), 
              as.character(dat[, 3]), 
              as.character(dat[, 4]), 
              as.character(dat[, 5]), 
              as.character(dat[, 6]))
# 所有種類
levels.all = levels(as.factor(all.items))
# 去掉 ""
levels.all = levels.all[-1]
# 建立架構
MyNodes = data.frame(matrix(NA, ncol = 3, nrow = length(levels.all)))
# 命名
colnames(MyNodes) = c("name", "group", "size")
MyNodes[, 1] = levels.all
# output的商品與類別
out.items = as.character(dat[, 6])
levels.out = levels(dat[, 6])
# 依照分類填入group
MyNodes[levels.all %in% levels.out, 2] = "Output"
MyNodes[!levels.all %in% levels.out, 2] = "Input"
# size都設100
MyNodes[, 3] = c(rep(5, 5), rep(10, 10), rep(100, 5))
head(MyNodes)

畫圖

forceNetwork(Links = MyLinks, Nodes = MyNodes, Source = "source", 
             Target = "target", Value = "value", NodeID = "name", 
             Group = "group", opacity = 1, zoom = TRUE, Nodesize = "size", 
             # colourScale = JS("d3.scale.category10()"), 
             legend = TRUE, fontSize = 20,  bounded = FALSE, 
             charge = -120, opacityNoHover = 0.6, linkDistance = 150)

NBA

library(scales)
nba = read.csv("data11_2.csv", header = TRUE, row.names = 1)
head(nba)

看相關

library(corrplot)
library(RColorBrewer)
cormatrix.nba = cor(nba)
corrplot(cormatrix.nba, type = "upper", order = "hclust", tl.col = "black", tl.srt = 45)

igraph作法

cormatrix.nba[cormatrix.nba < 0.4] = 0
cormatrix.nba[cormatrix.nba ==  1] = 0
g <- graph_from_adjacency_matrix(cormatrix.nba, weighted = TRUE, add.rownames = TRUE)
plot(g, 
     layout = layout.fruchterman.reingold, 
     edge.arrow.size = 0.2, 
     vertex.color = 'lightgray', vertex.frame.color = NA, 
     vertex.size = 15, vertex.label.cex = 0.8)

networkD3作法

MyNodes <- data.frame(name = rownames(cormatrix.nba), 
                      group = c(1, 2, 2, 2, 2, 1, 2, 2, 3, 3, 3, 3, 1, 1, 1, 2, 2, 1, 2, 1))
cormatrix.nba.t <- cormatrix.nba
rownames(cormatrix.nba.t) <- as.integer(as.factor(rownames(cormatrix.nba))) - 1
colnames(cormatrix.nba.t) <- as.integer(as.factor(rownames(cormatrix.nba))) - 1

library(reshape2)
MyLinks <- melt(cormatrix.nba.t)
colnames(MyLinks) = c("source", "target", "value")
MyLinks <- MyLinks[MyLinks$value != 0, ] # 無關
MyLinks <- MyLinks[MyLinks$value != 1, ] # 自己

forceNetwork(Links = MyLinks, Nodes = MyNodes, Source = "source", 
             Target = "target", Value = "value", NodeID = "name", 
             Group = "group", opacity = 1, zoom = TRUE, 
             # colourScale = JS("d3.scale.category10()"), 
             legend = TRUE, fontSize = 20,  bounded = FALSE, 
             charge = -120, opacityNoHover = 0.6, linkDistance = 150)

Higgs Twitter Dataset : Friends/follower

Data Link

dta <- read.table("higgs-social_network.edgelist", header = F)
dta <- dta-1

tmp1 <- dta[dta$V1<50, ]
tmp2 <- dta[dta$V2<50, ]
dta <- merge(tmp1, tmp2)
head(dta)
g <- graph.data.frame(dta, directed = TRUE)
plot(g, layout = layout_with_gem, 
     vertex.size = degree(g), vertex.label = NA, edge.arrow.size = 0.1, )

graph_ggplot(g)
## Warning: Ignoring unknown aesthetics: label

Gephi 2000 nodes

Youtube Video Social Graph in 2007, 2008

Data Link

dta <- read.table('youtube_080727/0.txt', header = F, sep = "\t", fill = TRUE)
colnames(dta) <- c("videoID", "uploader", "age", "category", "length", "views", "rate", "ratings", "comments", c(1:20))
head(dta)
normalize <- function(x){(x-min(x))/(max(x)-min(x))}
size = normalize(dta$views)+
       normalize(dta$ratings)+
       normalize(dta$comments)
node <- data.frame(name  = dta$videoID, 
                   group = dta$category, 
                   size  = size)
link <- dta %>% 
  select(videoID, '1':'20') %>% 
  gather(value, target, -videoID) %>% 
  rename(source = 1) %>%
  mutate(value  = as.character(50/as.integer(value)), 
         target = as.character(target), 
         source = as.character(source))
## Warning: attributes are not identical across measure variables;
## they will be dropped
levels.all <- c(as.character(link[, 1]), as.character(link[, 3]))
levels.all <- levels(as.factor(levels.all))
for (i in c(1, 3)) {
  link[, i] = as.integer(factor(link[, i], levels = levels.all)) - 1
}
node <- node %>% 
  mutate(name = as.character(as.integer(factor(name, levels = levels.all)) - 1), 
         group = as.character(group), 
         size = as.character(as.integer(size)*100))
onode <- data.frame(name = link$target, 
                    group = 'None', 
                    size = 5)
node <- rbind(node, onode)
head(link)
head(node)
forceNetwork(Links = link, Nodes = node, Source = "source", 
             Target = "target", Value = "value", NodeID = "name", 
             Group = "group", Nodesize = "size", opacity = 1, zoom = TRUE, 
             legend = TRUE, fontSize = 20,  bounded = FALSE, arrows = TRUE, 
             charge = -120, opacityNoHover = 0.6, linkDistance = 150)

Google Scholar Collaboration Networks

Reference Link

if(!require("devtools")) install.packages("devtools")
library("devtools")
install_github("pablobarbera/scholarnetwork")
## Skipping install of 'scholarnetwork' from a github remote, the SHA1 (11f37da8) has not changed since last install.
##   Use `force = TRUE` to force installation
library(scholarnetwork)

extractNetwork wraps the get_publications function from the scholar package, which extracts the list of publications on a Google Scholar profile, cleans it, and then parses the results into a format that is more suitable for network analysis: a data frame of weighted edges, where each edge is a collaboration in a publication, and the weight is one divided by number of co-authors; and a data frame with node-level information, which includes the group resulting from running a walktrap community detection algorithm.

d <- extractNetwork(id = "aG2KuDYAAAAJ&hl", n = 500)
str(d)
## List of 2
##  $ nodes:'data.frame':   129 obs. of  3 variables:
##   ..$ label : chr [1:129] "A Heathcote" "A Martinez" "A Takahashi" "A Tyler" ...
##   ..$ degree: num [1:129] 0.5 0.833 0.8 0.75 0.857 ...
##   ..$ group : num [1:129] 19 14 13 1 18 2 8 2 2 14 ...
##  $ edges:'data.frame':   482 obs. of  3 variables:
##   ..$ node1 : chr [1:482] "C Sheu" "N Todd" "S Mcmahon" "M Ohnishi" ...
##   ..$ node2 : chr [1:482] "A Heathcote" "A Martinez" "A Martinez" "A Takahashi" ...
##   ..$ weight: num [1:482] 0.5 0.167 0.167 0.2 0.2 ...
plotNetwork(d$nodes, d$edges, file = "network.html")
# cleaning network data
network <- graph_from_data_frame(d$edges, directed = FALSE)
set.seed(123)
l <- layout.fruchterman.reingold(network, niter = 1500) # layout
fc <- walktrap.community(network) # community detection

# node locations
nodes <- data.frame(l); names(nodes) <- c("x", "y")
nodes$cluster <- factor(fc$membership)
nodes$label <- fc$names
nodes$degree <- degree(network)

# edge locations
edgelist <- get.edgelist(network, names = FALSE)
edges <- data.frame(nodes[edgelist[, 1], c("x", "y")], nodes[edgelist[, 2], c("x", "y")])
names(edges) <- c("x1", "y1", "x2", "y2")

# and now visualizing it...
p <- ggplot(nodes, aes(x = x, y = y, color = cluster, label = label, size = degree))
pq <- p + 
  # nodes
  geom_point(color = "grey20", aes(fill = cluster), 
             shape = 21, show.legend = FALSE, alpha = 1/2) +
  geom_text(color = "black", aes(label = label, size = degree), 
                    show.legend = FALSE, alpha = 2/3) +
  # edges
  geom_segment(
    aes(x = x1, y = y1, xend = x2, yend = y2, label = NA), 
    data = edges, size = 0.25, color = "grey20", alpha = 1/5) +
  ## note that here I add a border to the points
  scale_fill_discrete(labels = labels) +
  scale_size_continuous(range = c(2, 8)) +
  theme(
    panel.background = element_rect(fill = "white"), 
    plot.background = element_rect(fill = "white"), 
    axis.line = element_blank(), axis.text = element_blank(), 
    axis.ticks = element_blank(), 
    axis.title = element_blank(), panel.border = element_blank(), 
    panel.grid.major = element_blank(), 
    panel.grid.minor = element_blank(), 
    legend.background = element_rect(colour = F, fill = "black"), 
    legend.key = element_rect(fill = "black", colour = F), 
    legend.title = element_text(color = "white"), 
    legend.text = element_text(color = "white")
  ) +
  ## changing size of points in legend
  guides(fill = guide_legend(override.aes = list(size = 5)))
## Warning: Ignoring unknown aesthetics: label
pq

it is difficult to make sure labels do not overlap. A probably better option is to export the network data to a format that Gephi can read, and then edit it manually in Gephi, as shown below.

df <- data.frame(Source = d$edges$node1, Target = d$edges$node2)
write.csv(df, file = "edgelist-gephi.csv", row.names = FALSE)