## Warning: package 'tibble' was built under R version 3.6.2

## Warning: package 'tidyr' was built under R version 3.6.2

## Warning: package 'purrr' was built under R version 3.6.2

construct graph

# 建立一個空的圖
g<-graph.empty(directed = F)
# 增加node
g<-add.vertices(g, 3)
# 增加edge
g<-add.edges(g, c(c(1, 2, 
                    1, 3, 
                    2, 3, 
                    2, 1, 
                    3, 2, 
                    1, 1))) 
# 設定node的feature
V(g)$label <- c("a", "b", "c")
V(g)$color <- c('red', 'blue', 'orange')
V(g)$size <- c(20, 40, 60)
V(g)$member <- c(1, 2, 1)
# 篩選
V(g)[which(V(g)$member ==  1)]

## + 2/3 vertices, from bedfd20:
## [1] 1 3

V(g)[degree(g) > 1]

## + 3/3 vertices, from bedfd20:
## [1] 1 2 3

# 設定edge的feature
E(g)$weight <- c(2, 8, 13, 4, 11, 3)
# 內建E(g)$weight。但是沒有label項，用set_vertex_attr加入edge label
g<-set_vertex_attr(g, "name", value = V(g)$label)
E(g)

## + 6/6 edges from bedfd20 (vertex names):
## [1] a--b a--c b--c a--b b--c a--a

Plot

plot(g, 
     layout          = layout.fruchterman.reingold, 
     edge.arrow.size = 0.4, 
     vertex.color    = V(g)$color, 
     vertex.size     = V(g)$size, 
     edge.width      = E(g)$weight)

data frame to graph

g  <- graph.data.frame(data.frame(
  id1 = c('Bob', 'Mark', 'Red', 'Mat', 'White', 'White', 'Bob'),    
  di2 = c('Red', 'White', 'Mat', 'Blue', 'Bob', 'Mark', 'Mat')))
# 等同於
# g <- graph(edges = c('Bob'  , 'Red', 
#                      'Mark' , 'White', 
#                      'Red'  , 'Mat', 
#                      'Mat'  , 'Blue', 
#                      'White', 'Bob', 
#                      'White', 'Mark', 
#                      'Bob'  , 'Mat'), directed = TRUE)
plot(g, 
     layout = layout.fruchterman.reingold, 
     edge.arrow.size = 0.4, 
     vertex.color = 'lightblue', 
     vertex.size = 25)

g[]

## 6 x 6 sparse Matrix of class "dgCMatrix"
##       Bob Mark Red Mat White Blue
## Bob     .    .   1   1     .    .
## Mark    .    .   .   .     1    .
## Red     .    .   .   1     .    .
## Mat     .    .   .   .     .    1
## White   1    1   .   .     .    .
## Blue    .    .   .   .     .    .

matrix to graph

# 隨便建造的一個矩陣
adjm <- matrix(sample(0:1, 100, replace = TRUE, prob = c(0.9, 0.1)), nc = 10)
## 對矩陣行列命名
rownames(adjm) <- sample(letters, nrow(adjm))
colnames(adjm) <- rownames(adjm) 
# matrix轉成graph
g1 <- graph_from_adjacency_matrix(adjm, weighted = TRUE, 
                                  mode = "undirected", add.rownames = TRUE)
# weighted是否需要加入權重
# mode有directed, undirected, upper, lower, max, min, plus有這麼幾種
# add.rownames或add.colnames定義名稱
g1

## IGRAPH 653e607 UNW- 10 15 -- 
## + attr: name (v/c), TRUE (v/c), weight (e/n)
## + edges from 653e607 (vertex names):
##  [1] s--s s--m s--p s--q s--l t--f t--q t--e m--e r--l r--e f--y q--q q--l
## [15] y--e

plot(g1, 
     layout = layout.fruchterman.reingold, 
     edge.arrow.size = 0.4, 
     vertex.color = 'white', 
     vertex.size = 25)

combine graphs

g1 <- graph.full(4)
g2 <- graph.ring(3)
g <- g1 %du% g2
plot(g)

# Find different
graph.difference(g1, g2, directed = F)

## IGRAPH dfe5e0e U--- 4 3 -- Full graph
## + attr: name (g/c), loops (g/l)
## + edges from dfe5e0e:
## [1] 1--4 2--4 3--4

cluster

g <- make_tree(40, children = 3, mode = "undirected")
# 建構模型
# 其他分群方法
# cluster_label_prop
# cluster_fast_greedy
# spinglass.community
ceb <- cluster_edge_betweenness(g); ceb

## IGRAPH clustering edge betweenness, groups: 7, mod: 0.68
## + groups:
##   $`1`
##   [1]  1  4 13 38 39 40
##   
##   $`2`
##   [1]  2  6  7 17 18 19 20 21 22
##   
##   $`3`
##   [1]  3  9 10 26 27 28 29 30 31
##   
##   $`4`
##   + ... omitted several groups/vertices

dendPlot(ceb)

plot(ceb, g)

other packages

networkD3

library(networkD3) 
data(MisLinks)
data(MisNodes)
forceNetwork(Links = MisLinks, Nodes = MisNodes, Source = "source", 
             Target = "target", Value = "value", NodeID = "name", 
             Group = "group", opacity = 0.4, zoom = TRUE)

MyNodes = data.frame(name  = c("Google", "Apple", "Amazon", "Youtube", "Paypal"), 
                     group = c("A", "B", "C", "A", "C"), 
                     size  = c(100, 25, 9, 1, 4))
# 描述圈圈間的聯結，source必定大於target
MyLinks = data.frame(source = c(1, 2, 2, 3), 
                     target = c(0, 0, 1, 0), 
                     value  = c(1, 2, 5, 10)) # value代表線的粗細
forceNetwork(Links = MyLinks, Nodes = MyNodes, Source = "source", 
             Target = "target", Value = "value", NodeID = "name", 
             Group = "group", opacity = 1, zoom = TRUE, 
             Nodesize = "size", opacityNoHover = 1) # opacityNoHover 顯示Node ID

qgraph

dataframe要轉成matrix

library(qgraph)

adj = matrix(sample(0:1, 10^2, TRUE, prob = c(0.8, 0.2)), nrow = 10, ncol = 10)
qgraph(adj)
title("Unweighted and directed graphs", line = 2.5)

# Save plot to nonsquare pdf file
qgraph(adj, filetype = 'pdf', height = 5, width = 10)

case study

classroom data

學生互動與成績的關聯

dta <- read.table("student.txt", header = T, sep = ",", row.names = 1)
dta[is.na(dta)] <- 0
head(dta)

# 將成績分成四個等級，成績越好顏色越紅
exam <- read.csv("examscore.csv", header = F) %>% 
  mutate(beh = cut(V2, breaks = quantile(V2, probs = c(0, .25, .5, .75, 1))), 
         col = as.character(factor(beh, labels = c(rev(heat.colors(4))))))

g1 <- graph_from_adjacency_matrix(data.matrix(dta), mode = "undirected")
plot(g1, 
     layout = layout.fruchterman.reingold, 
     edge.arrow.size = 0.4, 
     vertex.color = exam$col, 
     vertex.label.color = "black", 
     vertex.size = degree(g1))

Big5

library("psych")
data(big5)
data(big5groups)
dim(big5)

## [1] 500 240

# 看相關
# round(cor(big5), 2)

# Correlations來畫圖
big5Graph <- qgraph(cor(big5), minimum = 0.3, groups = big5groups, 
                    legend = TRUE, borders = TRUE, title = "Big 5 correlations")

# highlight positive
qgraph(big5Graph, posCol = "red", negCol = "gray")

# highlight negative
qgraph(big5Graph, posCol = "gray", negCol = "blue")

principle analysis

因素分析程序：決定因素數目、因素抽取、轉軸、解釋

# 先利用平行分析決定因素數目
fa.parallel(big5, fa = "pc", show.legend = FALSE)

## Parallel analysis suggests that the number of factors =  NA  and the number of components =  15

# 15 
# 決定因素數後，試作因素分析
# 因素抽取設定為主軸因子法，轉軸是最優轉軸法（斜交）
rst <- fa(big5, nfactor = 15, fm = 'pa', rotate = "promax")

## Loading required namespace: GPArotation

# 利用因素負載排序後呈現，隱藏某些較小係數
# print.psych(rst, cut = .3, sort = TRUE)

將因素分析結果畫成圖

loadings = as.data.frame(unclass(rst$loadings))
t <- loadings %>% 
  mutate(subject = row.names(loadings)) %>% 
  gather(factor, loading, -subject)
Graph <- qgraph(t, minimum = 0.3, groups = big5groups, 
                legend = TRUE, borders = TRUE, title = "")

t <- t %>% 
  mutate(grp = substr(subject,1,1),
         filter = ifelse(loading >= 0.3, 1, 0))
library(lattice)
stripplot(loading ~ grp | factor, data = t,
          groups = filter,
          jitter = T, layout = c(3, 2))

試試看五因素

rst <- fa(big5, nfactor = 5, fm = 'pa', rotate = "promax")

## Loading required namespace: GPArotation

# 太長了用圖表示
# print.psych(rst, cut = .3, sort = TRUE) 

loadings = as.data.frame(unclass(rst$loadings))
t <- loadings %>% 
  mutate(subject = row.names(loadings)) %>% 
  gather(factor, loading, -subject)
Graph <- qgraph(t, minimum = 0.3, groups = big5groups, 
                legend = TRUE, borders = TRUE, title = "")

t <- t %>% 
  mutate(grp = substr(subject,1,1),
         filter = ifelse(loading >= 0.3, 1, 0))
stripplot(loading ~ grp | factor, data = t,
          groups = filter,
          jitter = T, layout = c(3, 2))

復原力、滿意感與自尊量表

dta <- read.csv("edu.csv", header = T)
head(dta)

group <- list()
group$S <- seq(1, 12, 1)
group$R <- seq(13, 30, 1)
group$W <- seq(31, 36, 1)
# Correlations來畫圖
Graph <- qgraph(cor(dta), minimum = 0.4, groups = group, 
                legend = TRUE, borders = TRUE, title = "")

# Highlight negative
qgraph(Graph, posCol = "gray", negCol = "blue")

principle analysis

fa.parallel(dta, fa = "pc", show.legend = FALSE)

## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was
## done

## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was
## done

## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was
## done

## Warning in cor.smooth(r): Matrix was not positive definite, smoothing was
## done

## In factor.scores, the correlation matrix is singular, an approximation is used

## Warning in cor.smooth(r): Matrix was not positive definite, smoothing was
## done

## Parallel analysis suggests that the number of factors =  NA  and the number of components =  4

# 4
names(dta) <- c('說心情', '講話困難', '不會表現', '與人相處', '認識交談', '保持聯繫', '重要成員', '被看重的', '有貢獻的', '喜歡我', '重視意見', '喜歡表現', '全力以赴', '不放棄', '解決問題', '達成目標', '找尋資源', '承擔後果', '問題解決', '鼓勵自己', '保持平靜', '往好處想', '理想冒險', '相信幫我', '友人關心', '別人快樂', '常給建議', '感謝支持', '學習技能', '感激別人', '充滿喜悅', '神采飛揚', '快樂的人', '生活沈穩', '滿意生活', '生活豐盛')
rst <- fa(dta, nfactor = 4, fm = 'pa', rotate = "promax")

## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was
## done

## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was
## done

## In factor.scores, the correlation matrix is singular, an approximation is used

## Warning in cor.smooth(r): Matrix was not positive definite, smoothing was
## done

print.psych(rst, cut = .3, sort = TRUE)

## Factor Analysis using method =  pa
## Call: fa(r = dta, nfactors = 4, rotate = "promax", fm = "pa")
## Standardized loadings (pattern matrix) based upon correlation matrix
##          item   PA1   PA2   PA3   PA4   h2   u2 com
## 學習技能   29  0.89                   0.68 0.32 1.1
## 感謝支持   28  0.82                   0.58 0.42 1.1
## 解決問題   15  0.82                   0.57 0.43 1.2
## 不放棄     14  0.76                   0.56 0.44 1.1
## 感激別人   30  0.75                   0.55 0.45 1.0
## 達成目標   16  0.69                   0.59 0.41 1.1
## 承擔後果   18  0.69                   0.33 0.67 1.1
## 理想冒險   23  0.66              0.33 0.56 0.44 1.7
## 問題解決   19  0.65                   0.38 0.62 1.1
## 全力以赴   13  0.63                   0.63 0.37 1.3
## 找尋資源   17  0.60                   0.56 0.44 1.1
## 鼓勵自己   20  0.55                   0.46 0.54 1.3
## 保持平靜   21  0.49                   0.30 0.70 1.3
## 別人快樂   26  0.47                   0.55 0.45 1.5
## 常給建議   27  0.46        0.35       0.48 0.52 2.2
## 往好處想   22  0.44                   0.29 0.71 1.8
## 友人關心   25  0.35        0.33       0.47 0.53 3.2
## 滿意生活   35        0.99             0.82 0.18 1.0
## 生活豐盛   36        0.99             0.82 0.18 1.0
## 生活沈穩   34        0.91             0.73 0.27 1.0
## 神采飛揚   32        0.77             0.72 0.28 1.1
## 快樂的人   33        0.73             0.59 0.41 1.1
## 充滿喜悅   31        0.69        0.38 0.80 0.20 1.7
## 喜歡我     10              0.97       0.80 0.20 1.1
## 被看重的    8              0.83       0.66 0.34 1.1
## 重視意見   11              0.81       0.66 0.34 1.1
## 重要成員    7              0.78       0.64 0.36 1.4
## 有貢獻的    9              0.67       0.61 0.39 1.2
## 相信幫我   24              0.48       0.57 0.43 1.7
## 喜歡表現   12              0.45       0.49 0.51 1.5
## 不會表現    3                    0.81 0.70 0.30 1.0
## 認識交談    5                    0.70 0.51 0.49 1.0
## 講話困難    2                    0.67 0.40 0.60 1.1
## 說心情      1                    0.59 0.40 0.60 1.1
## 與人相處    4                    0.57 0.56 0.44 1.2
## 保持聯繫    6                    0.49 0.33 0.67 1.4
## 
##                        PA1  PA2  PA3  PA4
## SS loadings           7.56 4.57 4.55 3.63
## Proportion Var        0.21 0.13 0.13 0.10
## Cumulative Var        0.21 0.34 0.46 0.56
## Proportion Explained  0.37 0.23 0.22 0.18
## Cumulative Proportion 0.37 0.60 0.82 1.00
## 
##  With factor correlations of 
##      PA1  PA2  PA3  PA4
## PA1 1.00 0.55 0.61 0.55
## PA2 0.55 1.00 0.47 0.48
## PA3 0.61 0.47 1.00 0.54
## PA4 0.55 0.48 0.54 1.00
## 
## Mean item complexity =  1.3
## Test of the hypothesis that 4 factors are sufficient.
## 
## The degrees of freedom for the null model are  630  and the objective function was  49.91 with Chi Square of  6795.73
## The degrees of freedom for the model are 492  and the objective function was  27.3 
## 
## The root mean square of the residuals (RMSR) is  0.05 
## The df corrected root mean square of the residuals is  0.05 
## 
## The harmonic number of observations is  150 with the empirical chi square  423.05  with prob <  0.99 
## The total number of observations was  150  with Likelihood Chi Square =  3644.11  with prob <  0 
## 
## Tucker Lewis Index of factoring reliability =  0.331
## RMSEA index =  0.207  and the 90 % confidence intervals are  0.201 0.214
## BIC =  1178.87
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy             
##                                                    PA1  PA2  PA3  PA4
## Correlation of (regression) scores with factors   0.97 0.98 0.97 0.95
## Multiple R square of scores with factors          0.95 0.95 0.93 0.90
## Minimum correlation of possible factor scores     0.89 0.91 0.87 0.79

loadings = as.data.frame(unclass(rst$loadings))
t <- loadings %>% 
  rename("復原力" = 1, "滿意感" = 2, "自尊一" = 3, "自尊二" = 4) %>%
  mutate(s = row.names(loadings)) %>% 
  gather(value, target, -s)
par(family = "黑體-繁 中黑")
Graph <- qgraph(t, minimum = 0.2, groups = group, 
                legend = TRUE, borders = TRUE, title = "", 
                vertex.label.family = "黑體-繁 中黑")

## Warning in qgraph(t, minimum = 0.2, groups = group, legend = TRUE, borders
## = TRUE, : The following arguments are not documented and likely not
## arguments of qgraph and thus ignored: vertex.label.family

商場購物資料集

dat <- read.csv("Rules.csv")
head(dat)

產生Nodes

# 所有商品
all.items = c(as.character(dat[, 1]), 
              as.character(dat[, 2]), 
              as.character(dat[, 3]), 
              as.character(dat[, 4]), 
              as.character(dat[, 5]), 
              as.character(dat[, 6]))
# 所有種類
levels.all = levels(as.factor(all.items))
# 去掉 ""
levels.all = levels.all[-1]
# 建立架構
MyNodes = data.frame(matrix(NA, ncol = 3, nrow = length(levels.all)))
# 命名
colnames(MyNodes) = c("name", "group", "size")
MyNodes[, 1] = levels.all
# output的商品與類別
out.items = as.character(dat[, 6])
levels.out = levels(dat[, 6])
# 依照分類填入group
MyNodes[levels.all %in% levels.out, 2] = "Output"
MyNodes[!levels.all %in% levels.out, 2] = "Input"
# size都設100
MyNodes[, 3] = c(rep(5, 5), rep(10, 10), rep(100, 5))
head(MyNodes)

整理Links

# 將商品數字化
for (i in 1:6) {
  dat[, i] = as.integer(factor(dat[, i], levels = levels.all)) - 1
}
# 逐行做成 大的node｜小的node｜lift 格式
Links = NULL
for (i in 1:nrow(dat)) {
  n.rule = sum(!is.na(dat[i, 1:5]))
  new.rule = matrix(NA, ncol = 3, nrow = n.rule)
  for (k in 1:n.rule) {
    item1 = dat[i, k]
    item2 = dat[i, 6]
    new.rule[k, 1] = max(item1, item2)
    new.rule[k, 2] = min(item1, item2)
    new.rule[k, 3] = dat[i, 9]
  }
  Links = rbind(Links, new.rule)
}

#將重複的相加在一起
dup.MyLinks = Links[duplicated(Links[, 1:2]), ]
nondup.MyLinks = Links[!duplicated(Links[, 1:2]), ]
for (i in 1:nrow(dup.MyLinks)) {
  dup.pos = which(nondup.MyLinks[, 1] == dup.MyLinks[i, 1] & 
                  nondup.MyLinks[, 2] == dup.MyLinks[i, 2])
  nondup.MyLinks[dup.pos, 3] = nondup.MyLinks[dup.pos, 3] + dup.MyLinks[i, 3]
}

#重新格式化
colnames(nondup.MyLinks) = c("source", "target", "value")
MyLinks = data.frame(nondup.MyLinks)
head(MyLinks)

畫圖

forceNetwork(Links = MyLinks, Nodes = MyNodes, Source = "source", 
             Target = "target", Value = "value", NodeID = "name", 
             Group = "group", opacity = 1, zoom = TRUE, Nodesize = "size", 
             # colourScale = JS("d3.scale.category10()"), 
             legend = TRUE, fontSize = 20,  bounded = FALSE, 
             charge = -120, opacityNoHover = 0.6, linkDistance = 150)

NBA

library(scales)
nba = read.csv("data11_2.csv", header = TRUE, row.names = 1)
head(nba)

看相關

library(corrplot)
library(RColorBrewer)
cormatrix.nba = cor(nba)
corrplot(cormatrix.nba, type = "upper", order = "hclust", tl.col = "black", tl.srt = 45)

igraph作法

cormatrix.nba[cormatrix.nba < 0.4] = 0
cormatrix.nba[cormatrix.nba ==  1] = 0
g <- graph_from_adjacency_matrix(cormatrix.nba, weighted = TRUE, add.rownames = TRUE)
plot(g, 
     layout = layout.fruchterman.reingold, 
     edge.arrow.size = 0.2, 
     vertex.color = 'lightgray', vertex.frame.color = NA, 
     vertex.size = 15, vertex.label.cex = 0.8)

networkD3作法

MyNodes <- data.frame(name = rownames(cormatrix.nba), 
                      group = c(1, 2, 2, 2, 2, 1, 2, 2, 3, 3, 3, 3, 1, 1, 1, 2, 2, 1, 2, 1))
cormatrix.nba.t <- cormatrix.nba
rownames(cormatrix.nba.t) <- as.integer(as.factor(rownames(cormatrix.nba))) - 1
colnames(cormatrix.nba.t) <- as.integer(as.factor(rownames(cormatrix.nba))) - 1

library(reshape2)
MyLinks <- melt(cormatrix.nba.t)
colnames(MyLinks) = c("source", "target", "value")
MyLinks <- MyLinks[MyLinks$value != 0, ] # 無關
MyLinks <- MyLinks[MyLinks$value != 1, ] # 自己

forceNetwork(Links = MyLinks, Nodes = MyNodes, Source = "source", 
             Target = "target", Value = "value", NodeID = "name", 
             Group = "group", opacity = 1, zoom = TRUE, 
             # colourScale = JS("d3.scale.category10()"), 
             legend = TRUE, fontSize = 20,  bounded = FALSE, 
             charge = -120, opacityNoHover = 0.6, linkDistance = 150)

Higgs Twitter Dataset : Friends/follower

Data Link

dta <- read.table("higgs-social_network.edgelist", header = F)
dta <- dta-1

tmp1 <- dta[dta$V1<50, ]
tmp2 <- dta[dta$V2<50, ]
dta <- merge(tmp1, tmp2)
head(dta)

g <- graph.data.frame(dta, directed = TRUE)
plot(g, layout = layout_with_gem, 
     vertex.size = degree(g), vertex.label = NA, edge.arrow.size = 0.1, )

graph_ggplot(g)

## Warning: Ignoring unknown aesthetics: label

Gephi 2000 nodes

Youtube Video Social Graph in 2007, 2008

Data Link

dta <- read.table('youtube_080727/0.txt', header = F, sep = "\t", fill = TRUE)
colnames(dta) <- c("videoID", "uploader", "age", "category", "length", "views", "rate", "ratings", "comments", c(1:20))
head(dta)

normalize <- function(x){(x-min(x))/(max(x)-min(x))}
size = normalize(dta$views)+
       normalize(dta$ratings)+
       normalize(dta$comments)
node <- data.frame(name  = dta$videoID, 
                   group = dta$category, 
                   size  = size)
link <- dta %>% 
  select(videoID, '1':'20') %>% 
  gather(value, target, -videoID) %>% 
  rename(source = 1) %>%
  mutate(value  = as.character(50/as.integer(value)), 
         target = as.character(target), 
         source = as.character(source))

## Warning: attributes are not identical across measure variables;
## they will be dropped

levels.all <- c(as.character(link[, 1]), as.character(link[, 3]))
levels.all <- levels(as.factor(levels.all))
for (i in c(1, 3)) {
  link[, i] = as.integer(factor(link[, i], levels = levels.all)) - 1
}
node <- node %>% 
  mutate(name = as.character(as.integer(factor(name, levels = levels.all)) - 1), 
         group = as.character(group), 
         size = as.character(as.integer(size)*100))
onode <- data.frame(name = link$target, 
                    group = 'None', 
                    size = 5)
node <- rbind(node, onode)
head(link)

head(node)

forceNetwork(Links = link, Nodes = node, Source = "source", 
             Target = "target", Value = "value", NodeID = "name", 
             Group = "group", Nodesize = "size", opacity = 1, zoom = TRUE, 
             legend = TRUE, fontSize = 20,  bounded = FALSE, arrows = TRUE, 
             charge = -120, opacityNoHover = 0.6, linkDistance = 150)

Google Scholar Collaboration Networks

Reference Link

if(!require("devtools")) install.packages("devtools")
library("devtools")
install_github("pablobarbera/scholarnetwork")

## Skipping install of 'scholarnetwork' from a github remote, the SHA1 (11f37da8) has not changed since last install.
##   Use `force = TRUE` to force installation

library(scholarnetwork)

extractNetwork wraps the get_publications function from the scholar package, which extracts the list of publications on a Google Scholar profile, cleans it, and then parses the results into a format that is more suitable for network analysis: a data frame of weighted edges, where each edge is a collaboration in a publication, and the weight is one divided by number of co-authors; and a data frame with node-level information, which includes the group resulting from running a walktrap community detection algorithm.

d <- extractNetwork(id = "aG2KuDYAAAAJ&hl", n = 500)
str(d)

## List of 2
##  $ nodes:'data.frame':   129 obs. of  3 variables:
##   ..$ label : chr [1:129] "A Heathcote" "A Martinez" "A Takahashi" "A Tyler" ...
##   ..$ degree: num [1:129] 0.5 0.833 0.8 0.75 0.857 ...
##   ..$ group : num [1:129] 19 14 13 1 18 2 8 2 2 14 ...
##  $ edges:'data.frame':   482 obs. of  3 variables:
##   ..$ node1 : chr [1:482] "C Sheu" "N Todd" "S Mcmahon" "M Ohnishi" ...
##   ..$ node2 : chr [1:482] "A Heathcote" "A Martinez" "A Martinez" "A Takahashi" ...
##   ..$ weight: num [1:482] 0.5 0.167 0.167 0.2 0.2 ...

plotNetwork(d$nodes, d$edges, file = "network.html")

# cleaning network data
network <- graph_from_data_frame(d$edges, directed = FALSE)
set.seed(123)
l <- layout.fruchterman.reingold(network, niter = 1500) # layout
fc <- walktrap.community(network) # community detection

# node locations
nodes <- data.frame(l); names(nodes) <- c("x", "y")
nodes$cluster <- factor(fc$membership)
nodes$label <- fc$names
nodes$degree <- degree(network)

# edge locations
edgelist <- get.edgelist(network, names = FALSE)
edges <- data.frame(nodes[edgelist[, 1], c("x", "y")], nodes[edgelist[, 2], c("x", "y")])
names(edges) <- c("x1", "y1", "x2", "y2")

# and now visualizing it...
p <- ggplot(nodes, aes(x = x, y = y, color = cluster, label = label, size = degree))
pq <- p + 
  # nodes
  geom_point(color = "grey20", aes(fill = cluster), 
             shape = 21, show.legend = FALSE, alpha = 1/2) +
  geom_text(color = "black", aes(label = label, size = degree), 
                    show.legend = FALSE, alpha = 2/3) +
  # edges
  geom_segment(
    aes(x = x1, y = y1, xend = x2, yend = y2, label = NA), 
    data = edges, size = 0.25, color = "grey20", alpha = 1/5) +
  ## note that here I add a border to the points
  scale_fill_discrete(labels = labels) +
  scale_size_continuous(range = c(2, 8)) +
  theme(
    panel.background = element_rect(fill = "white"), 
    plot.background = element_rect(fill = "white"), 
    axis.line = element_blank(), axis.text = element_blank(), 
    axis.ticks = element_blank(), 
    axis.title = element_blank(), panel.border = element_blank(), 
    panel.grid.major = element_blank(), 
    panel.grid.minor = element_blank(), 
    legend.background = element_rect(colour = F, fill = "black"), 
    legend.key = element_rect(fill = "black", colour = F), 
    legend.title = element_text(color = "white"), 
    legend.text = element_text(color = "white")
  ) +
  ## changing size of points in legend
  guides(fill = guide_legend(override.aes = list(size = 5)))

## Warning: Ignoring unknown aesthetics: label

pq

it is difficult to make sure labels do not overlap. A probably better option is to export the network data to a format that Gephi can read, and then edit it manually in Gephi, as shown below.

df <- data.frame(Source = d$edges$node1, Target = d$edges$node2)
write.csv(df, file = "edgelist-gephi.csv", row.names = FALSE)

Graph

0525

construct graph

Plot

data frame to graph

matrix to graph

combine graphs

cluster

other packages

networkD3

qgraph

dataframe要轉成matrix

case study

classroom data

Big5

principle analysis

試試看五因素

復原力、滿意感與自尊量表

principle analysis

商場購物資料集

產生Nodes

整理Links

畫圖

NBA

看相關

igraph作法

networkD3作法

Higgs Twitter Dataset : Friends/follower

Gephi 2000 nodes

Youtube Video Social Graph in 2007, 2008

Google Scholar Collaboration Networks