社交網絡分析是一種無監督的機器學習方法,有一些類似於機器學習中的KNN,識別出不同的網絡群體,可以應用在 FB推薦朋友、或是商品推薦、影片推薦、甚至是疾病傳播、反詐欺等等。 社交網絡通常使用圖來描述,圖可以非常直觀的描述事物之間的關係。在圖中,節點(node)表示一個人,或者一個事物,邊(edge)代表人或者事物之間的關係。

library(statnet)
library(igraph) 
# detach(package:igraph)
# setwd("/Users/Lynn/Documents/資料管理/midtermReport")

建構graph

# 建立一個空的圖
g<-graph.empty(directed=F)
# 增加node
g<-add.vertices(g,3)
# 增加edge
g<-add.edges(g,c(c(1,2,1,3,2,3,2,1,3,2,1,1))) 

# 可以用XQuartz修改
# tkplot(g)
# rglplot(g)

# 設定node的feature
V(g)$label <- c("a","b","c")
V(g)$color <- c('red','blue','orange')
V(g)$size <- c(20,40,60)
V(g)$member <- c(1,2,1)
# 篩選
V(g)[which(V(g)$member==1)]
## + 2/3 vertices, from 5ff4a0a:
## [1] 1 3
V(g)[degree(g)>1]
## + 3/3 vertices, from 5ff4a0a:
## [1] 1 2 3
# 刪除不要的node
g<-g-V(g)[degree(g)==0]
# 檢查2點之間是否連接。0沒有1有
edge.connectivity(g, 1,3)
## [1] 3
# 相鄰點的集合——neighbors
neighbors(g,v=which(V(g)$label=="a"))
## + 5/3 vertices, from c5d13ad:
## [1] 1 1 2 2 3
# default mode是out,還有in,total
V(g)[neighbors(g,v=which(V(g)$label=="b"),mode="total")]
## + 4/3 vertices, from c5d13ad:
## [1] 1 1 3 3
# 線是否能夠指回自己,1-1就是指回自己
which_loop(g) 
## [1] FALSE FALSE FALSE FALSE FALSE  TRUE
# 是否有重複線,後面1-1與前面1-1重複了
which_multiple(g) 
## [1] FALSE FALSE FALSE  TRUE  TRUE FALSE
# 設定edge的weight
E(g)$weight <- c(2,8,13,4,11,3)
# 內建E(g)$weight。但是沒有label項,用set_vertex_attr加入edge label
g<-set_vertex_attr(g,"name",value=V(g)$label)
# 篩選與刪除
E(g)
## + 6/6 edges from c5d13ad (vertex names):
## [1] a--b a--c b--c a--b b--c a--a
E(g)[E(g)$weight>2]
## + 5/6 edges from c5d13ad (vertex names):
## [1] a--c b--c a--b b--c a--a
g<-g-E(g)[E(g)$weight==1] 
E(g)
## + 6/6 edges from bb41441 (vertex names):
## [1] a--b a--c b--c a--b b--c a--a
# 線的重複情況。不重複是1,重複一次+1
count.multiple(g) 
## [1] 2 1 2 2 2 1
# 簡化網絡,loops指回自己,比如1-2-1;multiple是指重複比如1-2,2-1。edge.attr.comb,使用重複次數來更新edge權重E(g)$weight
gsim <- simplify(g,remove.multiple = TRUE, remove.loops = TRUE, edge.attr.comb = "mean") 
E(gsim)$weight
## [1]  3  8 12

繪圖

plot(g, 
     layout=layout.fruchterman.reingold,
     edge.arrow.size=0.4,
     vertex.color=V(g)$color,
     vertex.size=V(g)$size,
     edge.width=E(g)$weight)

繪圖參數

layout表示佈局方式(發散性)

mark.groups 表示設置分組

NODES

vertex.color 節點顏色(display.brewer.all()看配色)

vertex.frame.color 節點邊框顏色

vertex.shape 節點形狀,可以參考names(igraph:::.igraph.shapes)

vertex.size 節點大小(默認為15)

vertex.label 默認顯示id,可以自己設定

vertex.label.family 字體類型

vertex.label.font 2粗體 3斜體 4粗體斜體

vertex.label.cex 字體大小

vertex.label.color 字體顏色

vertex.label.dist 點label和節點之間的距離一般0.1,錯開重疊

vertex.label.degree label相對於頂點的位置,0右,“pi”左,“pi/2”下,“-pi/2”上

EDGES

edge.color edge顏色

edge.width 邊緣寬度,默認為1

edge.arrow.size 箭頭大小,默認為1

edge.arrow.width 箭頭寬度,默認為1

edge.lty 0 “” 1“實心” 2“虛線” 3“點綴” 4“dotdash” 5“longdash” 6“twodash”

edge.label

edge.label.family

edge.label.font

edge.label.cex

edge.curved 邊緣曲率,範圍0-1(FALSE=0,TRUE=0.5)

arrow.mode 是否應該有箭頭,0沒有箭頭,1後退,2前進,3兩者

edge.arrow.mode = “-” 箭頭換成線

data.frame轉成graph

g  <- graph.data.frame(data.frame(id1=c('Bob','Mark','Red','Mat','White','White','Bob'),    di2=c('Red','White','Mat','Blue','Bob','Mark','Mat')))
# 等同於
# g <- graph(edges=c('Bob','Red','Mark','White','Red','Mat','Mat','Blue','White','Bob','White','Mark','Bob','Mat'), directed = TRUE)
plot(g, 
     layout=layout.fruchterman.reingold,
     edge.arrow.size=0.4,
     vertex.color='lightblue',
     vertex.size=25)

g[]
## 6 x 6 sparse Matrix of class "dgCMatrix"
##       Bob Mark Red Mat White Blue
## Bob     .    .   1   1     .    .
## Mark    .    .   .   .     1    .
## Red     .    .   .   1     .    .
## Mat     .    .   .   .     .    1
## White   1    1   .   .     .    .
## Blue    .    .   .   .     .    .

matrix轉成graph

# 隨便構造的一個矩陣
adjm <- matrix(sample(0:1, 100, replace=TRUE, prob=c(0.9,0.1)), nc=10)
# # matrix轉成graph
g1 <- graph_from_adjacency_matrix(adjm, weighted=TRUE, mode="undirected")
# weighted是否需要加入權重
# mode有directed, undirected, upper, lower, max, min, plus有這麼幾種
g1
## IGRAPH 5a274d2 U-W- 10 9 -- 
## + attr: weight (e/n)
## + edges from 5a274d2:
## [1] 1-- 3 1-- 8 1--10 3-- 4 3-- 6 3-- 9 4--10 7--10 9-- 9
plot(g1, 
     layout=layout.fruchterman.reingold,
     edge.arrow.size=0.4,
     vertex.color='white',
     vertex.size=25)

## 對矩陣行列命名
rownames(adjm) <- sample(letters, nrow(adjm))
colnames(adjm) <- seq(ncol(adjm))
# add.rownames以及add.colnames定義名稱,用V(g10)$row以及V(g10)$col可以看到
g10 <- graph_from_adjacency_matrix(adjm, weighted=TRUE, add.rownames="row",add.colnames="col")
g10
## IGRAPH 75488b7 D-W- 10 10 -- 
## + attr: col (v/c), row (v/c), weight (e/n)
## + edges from 75488b7:
##  [1]  1-> 3  1->10  4-> 3  4->10  6-> 3  7->10  8-> 1  9-> 3  9-> 9 10-> 1
plot(g10, 
     layout=layout.fruchterman.reingold,
     edge.arrow.size=0.4,
     vertex.color='white',
     vertex.size=25)

graphs 結合

g1 <- graph.full(4)
plot(g1)

g2 <- graph.ring(3)
plot(g2)

g <- g1 %du% g2
plot(g)

# Find different
graph.difference(g1, g2, directed=F)
## IGRAPH 99c50db U--- 4 3 -- Full graph
## + attr: name (g/c), loops (g/l)
## + edges from 99c50db:
## [1] 1--4 2--4 3--4

內建圖形函數

empty

eg <- make_empty_graph(40)
plot(eg, vertex.size=10, vertex.label=NA)

full

fg <- make_full_graph(40)
plot(fg, vertex.size=10, vertex.label=NA)

star

st <- make_star(40)
plot(st, vertex.size=10, vertex.label=NA)

tree

tr <- make_tree(40, children = 3, mode = "undirected")
plot(tr, vertex.size=10, vertex.label=NA)

## ring

rn <- make_ring(40)
plot(rn, vertex.size=10, vertex.label=NA)

Watts-Strogatz

sw <- sample_smallworld(dim=2, size=10, nei=1, p=0.1)
plot(sw, vertex.size=6, vertex.label=NA, layout=layout_in_circle)

Erdos-Renyi Model

# Generate Random Graphs According To The Erdos-Renyi Model
g <- erdos.renyi.game(20, 0.3)
plot(g, layout=layout.fruchterman.reingold, vertex.label=NA, vertex.size=5)

Erdos-Renyi Model:gnm

# Generate random graph, fixed number of arcs
g <- erdos.renyi.game(20, 15, type='gnm') # sample_gnm
plot(g, layout=layout.fruchterman.reingold, vertex.label=NA, vertex.size=5)

Erdos-Renyi Model:gnp

# Generate Random Graphs According To The G(N,P) Erdos-Renyi Model
# different color for each component
g <- sample_gnp(100, 1/100)
comps <- components(g)$membership
colbar <- rainbow(max(comps)+1)
V(g)$color <- colbar[comps+1]
plot(g, layout=layout_with_fr, vertex.size=5, vertex.label=NA)

Barabasi-Albert Model

# Generate Scale-Free Graphs According To The Barabasi-Albert Model
g <- barabasi.game(60, power=1, zero.appeal=1.3)
plot(g, layout=layout.fruchterman.reingold, vertex.label=NA, vertex.size=5, edge.arrow.size=0.2)

lattice

lt = graph.lattice(c(3,4,2))
plot(lt, layout=layout.fruchterman.reingold)

communities

# plot communities in a graph
g <- make_full_graph(5) %du% make_full_graph(5) %du% make_full_graph(5)
g <- add_edges(g, c(1,6, 1,11, 6,11))
com <- cluster_spinglass(g, spins=5)
V(g)$color <- com$membership+1
g <- set_graph_attr(g, "layout", layout_with_kk(g))
plot(g, vertex.label.dist=1.5)

layout

g <- graph(c(2,1,3,1,4,1,5,1,6,1,7,1),directed = F)
par(mfrow=c(2,3))
plot(g,vertex.size=40,layout=layout_on_grid,main="簡單的網格布局")
plot(g,vertex.size=40,layout=layout.auto,main="自動布局")
plot(g,vertex.size=40,layout=layout_as_star,main="星形布局")
plot(g,vertex.size=40,layout=layout.circle,main="環形布局")
plot(g,vertex.size=40,layout=layout_randomly,main="隨機布局")
plot(g,vertex.size=40,layout=layout_as_tree(g),main="樹狀布局")

# {-}

分群

g <- tr <- make_tree(40, children = 3, mode = "undirected")
plot(g)

ceb <- cluster_edge_betweenness(g) # 建構模型
dendPlot(ceb)

plot(ceb,g)

ceb
## IGRAPH clustering edge betweenness, groups: 7, mod: 0.68
## + groups:
##   $`1`
##   [1]  1  4 13 38 39 40
##   
##   $`2`
##   [1]  2  6  7 17 18 19 20 21 22
##   
##   $`3`
##   [1]  3  9 10 26 27 28 29 30 31
##   
##   $`4`
##   + ... omitted several groups/vertices
# 其他分群方法
# cluster_label_prop
# cluster_fast_greedy
# spinglass.community

其他package

networkD3

library(networkD3) 
data(MisLinks)
data(MisNodes)
forceNetwork(Links = MisLinks, Nodes = MisNodes, Source = "source",
             Target = "target", Value = "value", NodeID = "name",
             Group = "group", opacity = 0.4, zoom = TRUE)
MyNodes = data.frame(name = c("Google", "Apple", "Amazon", "Youtube", "Paypal"),
                     group = c("A", "B", "C", "A", "C"),
                     size = c(100, 25, 9, 1, 4))
# 描述圈圈間的聯結,source必定大於target
MyLinks = data.frame(source = c(1, 2, 2, 3),
                     target = c(0, 0, 1, 0),
                     value = c(1, 2, 5, 10)) # value代表線的粗細
forceNetwork(Links = MyLinks, Nodes = MyNodes, Source = "source",
             Target = "target", Value = "value", NodeID = "name",
             Group = "group", opacity = 1, zoom = TRUE)

qgraph

library(qgraph)

qplot with matrix

  • dataframe要轉成matrix
adj=matrix(sample(0:1,10^2,TRUE,prob=c(0.8,0.2)),nrow=10,ncol=10)
qgraph(adj)
title("Unweighted and directed graphs",line=2.5)

# Save plot to nonsquare pdf file:
# qgraph(adj,filetype='pdf',height=5,width=10)

big5

library("psych")
data(big5)
data(big5groups)

# Correlations:
big5Graph <- qgraph(cor(big5),minimum=0.25,groups=big5groups,
            legend=TRUE,borders=FALSE, title = "Big 5 correlations")

# Same graph with spring layout:
qgraph(big5Graph,layout="spring")

# different color scheme:
# qgraph(big5Graph,posCol="blue",negCol="purple")

Exploratory factor analysis (have problems)

big5efa <- factanal(big5,factors=5,rotation="promax",scores="regression")
qgraph(big5efa,groups=big5groups,layout="circle",minimum=0.2,
       cut=0.4,vsize=c(1.5,10),borders=FALSE,vTrans=200,title="Big 5 EFA")
qgraph.efa(big5, 5, groups = big5groups, rotation = "promax",
           minimum = 0.2, cut = 0.4, vsize = c(1, 15), borders = FALSE,
           asize = 0.07, esize = 4, vTrans = 200)
## Principal component analysis:
big5pca <- principal(cor(big5),5,rotate="promax")
qgraph(big5pca,groups=big5groups,layout="circle",rotation="promax",minimum=0.2,
       cut=0.4,vsize=c(1.5,10),borders=FALSE,vTrans=200,title="Big 5 PCA")

bfi

data(bfi)

# Compute correlations:
CorMat <- cor_auto(bfi[,1:25])
## Variables detected as ordinal: A1; A2; A3; A4; A5; C1; C2; C3; C4; C5; E1; E2; E3; E4; E5; N1; N2; N3; N4; N5; O1; O2; O3; O4; O5
# Compute graph with tuning = 0 (BIC):
BICgraph <- qgraph(CorMat, graph = "glasso", sampleSize = nrow(bfi),
          tuning = 0, layout = "spring", title = "BIC", details = TRUE)
## Warning in EBICglassoCore(S = S, n = n, gamma = gamma, penalize.diagonal
## = penalize.diagonal, : A dense regularized network was selected (lambda <
## 0.1 * lambda.max). Recent work indicates a possible drop in specificity.
## Interpret the presence of the smallest edges with care. Setting threshold =
## TRUE will enforce higher specificity, at the cost of sensitivity.

# Compute graph with tuning = 0.5 (EBIC)
EBICgraph <- qgraph(CorMat, graph = "glasso", sampleSize = nrow(bfi),
          tuning = 0.5, layout = "spring", title = "BIC", details = TRUE)
## Warning in EBICglassoCore(S = S, n = n, gamma = gamma, penalize.diagonal
## = penalize.diagonal, : A dense regularized network was selected (lambda <
## 0.1 * lambda.max). Recent work indicates a possible drop in specificity.
## Interpret the presence of the smallest edges with care. Setting threshold =
## TRUE will enforce higher specificity, at the cost of sensitivity.

centrality and clustering

centralityPlot(list(BIC = BICgraph, EBIC = EBICgraph))
## Note: z-scores are shown on x-axis rather than raw centrality indices.

clusteringPlot(list(BIC = BICgraph, EBIC = EBICgraph))
## Note: z-scores are shown on x-axis rather than raw centrality indices.

centrality_auto(BICgraph)
## $node.centrality
##    Betweenness   Closeness  Strength ExpectedInfluence
## A1           1 0.002716442 1.1992733        0.07940007
## A2          21 0.003053247 1.3587499        0.58200348
## A3          21 0.003090016 1.1067197        0.80763344
## A4          11 0.003001281 1.0386258        0.47713479
## A5          15 0.003169514 1.0816605        0.80328286
## C1           1 0.002780521 1.0664627        0.45471027
## C2          24 0.003270458 1.4859096        0.93158317
## C3           1 0.002764676 0.9766622        0.34190351
## C4          20 0.003140338 1.6280943        0.46617377
## C5          18 0.003199705 1.2710371        0.19734805
## E1           2 0.002993497 1.3240260        0.01502171
## E2          15 0.003322839 1.4523625        0.21331154
## E3          11 0.003086533 1.2399614        0.91152675
## E4          27 0.003389793 1.4695271        0.29832607
## E5          18 0.003377435 1.4622480        0.42041191
## N1          17 0.002604386 1.3456596        1.07989574
## N2           0 0.002444764 1.3195712        0.92947534
## N3          10 0.002758247 1.0946576        1.05587436
## N4          21 0.003085156 1.2692686        0.84728277
## N5          13 0.003115974 1.3093307        0.81756520
## O1           5 0.002925719 1.1055701        0.56271655
## O2           2 0.002856739 1.0634642        0.48765814
## O3          14 0.003088008 1.4193970        0.36307920
## O4          10 0.003330758 1.2380913        0.75518514
## O5           4 0.003066329 1.2971293        0.25049673
## 
## $edge.betweenness.centrality
##     from to edgebetweenness
## 194   N3 N4              23
## 300   A5 E4              23
## 245   A3 A5              22
## 168   A2 A3              21
## 74    C4 C5              18
## 271   A4 C2              16
## 100   C5 N4              15
## 164   E5 N1              15
## 175   N1 N2              15
## 1     A1 A2              14
## 15    C1 C2              14
## 149   E3 O3              14
## 126   E2 E4              13
## 170   E5 O1              12
## 254   A3 E3              12
## 37    C2 C4              11
## 49    C2 N5              11
## 127   E2 E5              11
## 232   A2 E5              11
## 244   A3 A4              11
## 163   E4 O5              10
## 176   N1 N3              10
## 206   N4 O4              10
## 110   E1 E4               9
## 137   E2 O4               9
## 215   O1 O3               9
## 52    C2 O3               8
## 56    C3 C5               8
## 156   E4 N4               8
## 178   N1 N5               8
## 87    C4 O2               7
## 91    C4 O5               7
## 108   E1 E2               7
## 132   E2 N5               7
## 209   N5 O2               7
## 224   A2 C3               7
## 278   A4 E4               7
## 279   A1 C4               7
## 17    C1 C4               6
## 36    C2 C3               6
## 179   A2 A4               6
## 195   N3 N5               6
## 4     A5 N1               5
## 88    C4 O3               5
## 93    C5 E2               5
## 117   E1 N4               5
## 140   E3 E5               5
## 147   E3 O1               5
## 185   N2 N3               5
## 216   O1 O4               5
## 218   O2 O3               5
## 221   O3 O4               5
## 222   O3 O5               5
## 238   A2 N5               5
## 274   A4 C5               5
## 33    C1 O4               4
## 39    C2 E1               4
## 106   C5 O4               4
## 139   E3 E4               4
## 160   E4 O2               4
## 202   N4 N5               4
## 225   O4 O5               4
## 299   A5 E3               4
## 55    C3 C4               3
## 57    A1 N1               3
## 98    C5 N2               3
## 220   O2 O5               3
## 43    C2 E5               2
## 92    C5 E1               2
## 125   E2 E3               2
## 184   N1 O5               2
## 214   O1 O2               2
## 242   A2 O4               2
## 16    C1 C3               1
## 23    C1 E5               1
## 62    C3 E5               1
## 121   E1 O3               1
## 146   A1 O4               1
## 157   A1 O5               1
## 167   E5 N4               1
## 169   E5 N5               1
## 208   N5 O1               1
## 282   A4 N2               1
## 2     A1 E1               0
## 3     A5 E5               0
## 5     A5 N2               0
## 6     A5 N3               0
## 7     A5 N4               0
## 8     A5 N5               0
## 9     A5 O1               0
## 10    A5 O2               0
## 11    A5 O3               0
## 12    A5 O4               0
## 13    A1 E2               0
## 14    A5 O5               0
## 18    C1 C5               0
## 19    C1 E1               0
## 20    C1 E2               0
## 21    C1 E3               0
## 22    C1 E4               0
## 24    A1 E3               0
## 25    C1 N1               0
## 26    C1 N2               0
## 27    C1 N3               0
## 28    C1 N4               0
## 29    C1 N5               0
## 30    C1 O1               0
## 31    C1 O2               0
## 32    C1 O3               0
## 34    C1 O5               0
## 35    A1 E4               0
## 38    C2 C5               0
## 40    C2 E2               0
## 41    C2 E3               0
## 42    C2 E4               0
## 44    C2 N1               0
## 45    C2 N2               0
## 46    A1 E5               0
## 47    C2 N3               0
## 48    C2 N4               0
## 50    C2 O1               0
## 51    C2 O2               0
## 53    C2 O4               0
## 54    C2 O5               0
## 58    C3 E1               0
## 59    C3 E2               0
## 60    C3 E3               0
## 61    C3 E4               0
## 63    C3 N1               0
## 64    C3 N2               0
## 65    C3 N3               0
## 66    C3 N4               0
## 67    C3 N5               0
## 68    A1 N2               0
## 69    C3 O1               0
## 70    C3 O2               0
## 71    C3 O3               0
## 72    C3 O4               0
## 73    C3 O5               0
## 75    C4 E1               0
## 76    C4 E2               0
## 77    C4 E3               0
## 78    C4 E4               0
## 79    A1 N3               0
## 80    C4 E5               0
## 81    C4 N1               0
## 82    C4 N2               0
## 83    C4 N3               0
## 84    C4 N4               0
## 85    C4 N5               0
## 86    C4 O1               0
## 89    C4 O4               0
## 90    A1 N4               0
## 94    C5 E3               0
## 95    C5 E4               0
## 96    C5 E5               0
## 97    C5 N1               0
## 99    C5 N3               0
## 101   A1 N5               0
## 102   C5 N5               0
## 103   C5 O1               0
## 104   C5 O2               0
## 105   C5 O3               0
## 107   C5 O5               0
## 109   E1 E3               0
## 111   E1 E5               0
## 112   A1 A3               0
## 113   A1 O1               0
## 114   E1 N1               0
## 115   E1 N2               0
## 116   E1 N3               0
## 118   E1 N5               0
## 119   E1 O1               0
## 120   E1 O2               0
## 122   E1 O4               0
## 123   E1 O5               0
## 124   A1 O2               0
## 128   E2 N1               0
## 129   E2 N2               0
## 130   E2 N3               0
## 131   E2 N4               0
## 133   E2 O1               0
## 134   E2 O2               0
## 135   A1 O3               0
## 136   E2 O3               0
## 138   E2 O5               0
## 141   E3 N1               0
## 142   E3 N2               0
## 143   E3 N3               0
## 144   E3 N4               0
## 145   E3 N5               0
## 148   E3 O2               0
## 150   E3 O4               0
## 151   E3 O5               0
## 152   E4 E5               0
## 153   E4 N1               0
## 154   E4 N2               0
## 155   E4 N3               0
## 158   E4 N5               0
## 159   E4 O1               0
## 161   E4 O3               0
## 162   E4 O4               0
## 165   E5 N2               0
## 166   E5 N3               0
## 171   E5 O2               0
## 172   E5 O3               0
## 173   E5 O4               0
## 174   E5 O5               0
## 177   N1 N4               0
## 180   N1 O1               0
## 181   N1 O2               0
## 182   N1 O3               0
## 183   N1 O4               0
## 186   N2 N4               0
## 187   N2 N5               0
## 188   N2 O1               0
## 189   N2 O2               0
## 190   A2 A5               0
## 191   N2 O3               0
## 192   N2 O4               0
## 193   N2 O5               0
## 196   N3 O1               0
## 197   N3 O2               0
## 198   N3 O3               0
## 199   N3 O4               0
## 200   N3 O5               0
## 201   A2 C1               0
## 203   N4 O1               0
## 204   N4 O2               0
## 205   N4 O3               0
## 207   N4 O5               0
## 210   N5 O3               0
## 211   N5 O4               0
## 212   A2 C2               0
## 213   N5 O5               0
## 217   O1 O5               0
## 219   O2 O4               0
## 223   A1 A4               0
## 226   A2 C4               0
## 227   A2 C5               0
## 228   A2 E1               0
## 229   A2 E2               0
## 230   A2 E3               0
## 231   A2 E4               0
## 233   A2 N1               0
## 234   A2 N2               0
## 235   A1 A5               0
## 236   A2 N3               0
## 237   A2 N4               0
## 239   A2 O1               0
## 240   A2 O2               0
## 241   A2 O3               0
## 243   A2 O5               0
## 246   A1 C1               0
## 247   A3 C1               0
## 248   A3 C2               0
## 249   A3 C3               0
## 250   A3 C4               0
## 251   A3 C5               0
## 252   A3 E1               0
## 253   A3 E2               0
## 255   A3 E4               0
## 256   A3 E5               0
## 257   A1 C2               0
## 258   A3 N1               0
## 259   A3 N2               0
## 260   A3 N3               0
## 261   A3 N4               0
## 262   A3 N5               0
## 263   A3 O1               0
## 264   A3 O2               0
## 265   A3 O3               0
## 266   A3 O4               0
## 267   A3 O5               0
## 268   A1 C3               0
## 269   A4 A5               0
## 270   A4 C1               0
## 272   A4 C3               0
## 273   A4 C4               0
## 275   A4 E1               0
## 276   A4 E2               0
## 277   A4 E3               0
## 280   A4 E5               0
## 281   A4 N1               0
## 283   A4 N3               0
## 284   A4 N4               0
## 285   A4 N5               0
## 286   A4 O1               0
## 287   A4 O2               0
## 288   A4 O3               0
## 289   A4 O4               0
## 290   A1 C5               0
## 291   A4 O5               0
## 292   A5 C1               0
## 293   A5 C2               0
## 294   A5 C3               0
## 295   A5 C4               0
## 296   A5 C5               0
## 297   A5 E1               0
## 298   A5 E2               0
## 
## $ShortestPathLengths
##           A1        A2        A3        A4        A5        C1        C2
## A1  0.000000  3.570297  7.254209 10.067359 10.905572 17.280307 15.571420
## A2  3.570297  0.000000  3.683913  6.497062  7.335275 16.626393 13.109531
## A3  7.254209  3.683913  0.000000  6.637428  3.651362 16.766759 13.249897
## A4 10.067359  6.497062  6.637428  0.000000 10.288790 10.129331  6.612469
## A5 10.905572  7.335275  3.651362 10.288790  0.000000 20.418121 16.901259
## C1 17.280307 16.626393 16.766759 10.129331 20.418121  0.000000  3.516862
## C2 15.571420 13.109531 13.249897  6.612469 16.901259  3.516862  0.000000
## C3 12.859478  9.289181 12.973094 12.862595 16.624456  8.465580  6.250126
## C4 10.955133 14.525430 17.866184 11.228756 20.098983  6.325174  4.616288
## C5 14.321551 14.930147 15.415330  8.777902 19.066692  9.691592  7.982705
## E1 19.924278 16.353981 12.670069 13.130301  9.018707 16.996005 13.479143
## E2 17.636086 14.065790 11.492810 11.953042  7.841447 16.681291 17.218082
## E3 13.684147 10.113850  6.429937 13.067365  6.139703 19.097089 15.580227
## E4 15.147202 11.576905  7.892993  8.353225  4.241631 18.482556 14.965694
## E5 11.107030  7.536734 11.220646 14.033796 13.338028 14.307232 11.466751
## N1 16.666997 17.203811 18.417355 23.700874 14.765993 22.060544 18.543682
## N2 18.405226 18.942041 20.155585 22.100224 16.504222 23.798774 20.281911
## N3 21.429441 20.816606 22.308272 19.142400 18.656910 19.078857 15.561994
## N4 21.451558 21.150074 19.073781 15.907909 15.422419 16.821600 15.112713
## N5 18.571815 15.001518 18.685431 16.359375 16.147241 13.263768  9.746906
## O1 17.753935 14.183638 12.708491 19.345919 12.418256 16.933389 15.435779
## O2 19.581942 21.210571 17.526658 19.855566 16.242537 14.951984 13.243097
## O3 19.007276 15.436979 11.753066 16.869566 11.462832 13.773960 10.257098
## O4 14.862676 14.046767 17.730679 18.437107 14.325513 10.197226 13.714088
## O5 17.866041 19.778939 16.095026 16.555258 12.443664 13.980493 12.271606
##           C3        C4        C5        E1        E2        E3        E4
## A1 12.859478 10.955133 14.321551 19.924278 17.636086 13.684147 15.147202
## A2  9.289181 14.525430 14.930147 16.353981 14.065790 10.113850 11.576905
## A3 12.973094 17.866184 15.415330 12.670069 11.492810  6.429937  7.892993
## A4 12.862595 11.228756  8.777902 13.130301 11.953042 13.067365  8.353225
## A5 16.624456 20.098983 19.066692  9.018707  7.841447  6.139703  4.241631
## C1  8.465580  6.325174  9.691592 16.996005 16.681291 19.097089 18.482556
## C2  6.250126  4.616288  7.982705 13.479143 17.218082 15.580227 14.965694
## C3  0.000000  8.050522  5.640965 19.729269 18.412432 19.403031 20.866087
## C4  8.050522  0.000000  3.366418 18.067297 16.137885 17.562143 15.857352
## C5  5.640965  3.366418  0.000000 14.700879 12.771467 20.928561 16.371284
## E1 19.729269 18.067297 14.700879  0.000000  3.738939 13.066853  4.777076
## E2 18.412432 16.137885 12.771467  3.738939  0.000000 10.844424  3.599817
## E3 19.403031 17.562143 20.928561 13.066853 10.844424  0.000000  8.289777
## E4 20.866087 15.857352 16.371284  4.777076  3.599817  8.289777  0.000000
## E5 15.606836 16.083038 19.300523 10.267995  6.529056  7.198326 10.128873
## N1 20.767908 18.493360 15.126942 18.539517 16.196134 16.865404 19.007623
## N2 20.945606 18.671059 15.304641 18.740612 17.934363 18.603633 19.378818
## N3 16.005463 13.730916 10.364498 13.777073 14.120882 21.627848 14.415279
## N4 12.770973 10.496425  7.130008 10.542582 14.281521 19.470566 11.180789
## N5 15.997033 14.363194 13.278563 12.044732  8.305794 19.150218 11.905610
## O1 21.685906 17.417695 17.174215 16.914899 13.175960  6.278554 14.568331
## O2 16.677332  8.626810 11.993228 16.777982 15.600723 11.096721 12.000906
## O3 16.507224 12.239014 15.605432 16.555443 13.580246  5.323129 13.334892
## O4 16.079017 13.804470 10.438052 10.223004  6.484065 12.419309 10.083882
## O5 15.705841  7.655319 11.021737 12.979110 11.801850 10.455987  8.202033
##           E5        N1        N2        N3        N4        N5        O1
## A1 11.107030 16.666997 18.405226 21.429441 21.451558 18.571815 17.753935
## A2  7.536734 17.203811 18.942041 20.816606 21.150074 15.001518 14.183638
## A3 11.220646 18.417355 20.155585 22.308272 19.073781 18.685431 12.708491
## A4 14.033796 23.700874 22.100224 19.142400 15.907909 16.359375 19.345919
## A5 13.338028 14.765993 16.504222 18.656910 15.422419 16.147241 12.418256
## C1 14.307232 22.060544 23.798774 19.078857 16.821600 13.263768 16.933389
## C2 11.466751 18.543682 20.281911 15.561994 15.112713  9.746906 15.435779
## C3 15.606836 20.767908 20.945606 16.005463 12.770973 15.997033 21.685906
## C4 16.083038 18.493360 18.671059 13.730916 10.496425 14.363194 17.417695
## C5 19.300523 15.126942 15.304641 10.364498  7.130008 13.278563 17.174215
## E1 10.267995 18.539517 18.740612 13.777073 10.542582 12.044732 16.914899
## E2  6.529056 16.196134 17.934363 14.120882 14.281521  8.305794 13.175960
## E3  7.198326 16.865404 18.603633 21.627848 19.470566 19.150218  6.278554
## E4 10.128873 19.007623 19.378818 14.415279 11.180789 11.905610 14.568331
## E5  0.000000  9.667078 11.405308 14.429522 13.803300 12.985795  6.646904
## N1  9.667078  0.000000  1.738230  4.762444  7.996935  8.796775 16.313982
## N2 11.405308  1.738230  0.000000  4.963539  8.198029 10.535005 18.052212
## N3 14.429522  4.762444  4.963539  0.000000  3.234490  5.815088 18.841527
## N4 13.803300  7.996935  8.198029  3.234490  0.000000  6.148556 15.607036
## N5 12.985795  8.796775 10.535005  5.815088  6.148556  0.000000 16.969613
## O1  6.646904 16.313982 18.052212 18.841527 15.607036 16.969613  0.000000
## O2 17.079195 18.942574 20.680804 15.960887 16.294355 10.145799 10.432291
## O3 11.825586 21.492664 23.230893 19.201544 15.967054 15.919391  5.178682
## O4 13.013121 16.867808 17.068903 12.105364  8.870873 14.789859  6.736163
## O5 16.958444 19.818976 21.557206 19.478038 16.243548 15.059860 10.311540
##           O2        O3        O4        O5
## A1 19.581942 19.007276 14.862676 17.866041
## A2 21.210571 15.436979 14.046767 19.778939
## A3 17.526658 11.753066 17.730679 16.095026
## A4 19.855566 16.869566 18.437107 16.555258
## A5 16.242537 11.462832 14.325513 12.443664
## C1 14.951984 13.773960 10.197226 13.980493
## C2 13.243097 10.257098 13.714088 12.271606
## C3 16.677332 16.507224 16.079017 15.705841
## C4  8.626810 12.239014 13.804470  7.655319
## C5 11.993228 15.605432 10.438052 11.021737
## E1 16.777982 16.555443 10.223004 12.979110
## E2 15.600723 13.580246  6.484065 11.801850
## E3 11.096721  5.323129 12.419309 10.455987
## E4 12.000906 13.334892 10.083882  8.202033
## E5 17.079195 11.825586 13.013121 16.958444
## N1 18.942574 21.492664 16.867808 19.818976
## N2 20.680804 23.230893 17.068903 21.557206
## N3 15.960887 19.201544 12.105364 19.478038
## N4 16.294355 15.967054  8.870873 16.243548
## N5 10.145799 15.919391 14.789859 15.059860
## O1 10.432291  5.178682  6.736163 10.311540
## O2  0.000000  5.773592 12.286735  4.914061
## O3  5.773592  0.000000  7.096180  5.132858
## O4 12.286735  7.096180  0.000000  7.372674
## O5  4.914061  5.132858  7.372674  0.000000
## 
## attr(,"class")
## [1] "list"            "centrality_auto"
clustcoef_auto(BICgraph)

case study

Higgs Twitter Dataset : Friends/follower

Data Link

dta <- read.table("higgs-social_network.edgelist",header = F)
dta <- dta-1

tmp1 <- dta[dta$V1<200,]
tmp2 <- dta[dta$V2<200,]
dta <- merge(tmp1, tmp2)
head(dta)
g <- graph.data.frame(dta)
plot(g, layout=layout_with_gem,
     vertex.size=degree(g), vertex.label=NA, edge.arrow.size=0.1,)

colnames(dta) <- c("source","target")
node <- data.frame(name = as.numeric(V(g)),
                   group = "0",
                   size = degree(g)*2)
forceNetwork(Links = dta, Nodes = node, Source = "source", Group = "group",
             Target = "target", NodeID = "name",Nodesize = "size", 
             opacity = 1, zoom = TRUE,legend = TRUE, fontSize = 20,  
             bounded = FALSE, arrows = TRUE, charge = -120, 
             opacityNoHover = 0.6, linkDistance = 150)

Youtube Video Social Graph in 2007,2008

Data Link

dta <- read.table('youtube_080727/0.txt',header=F, sep="\t", fill=TRUE)
colnames(dta) <- c("videoID", "uploader", "age", "category", "length", "views", "rate", "ratings", "comments", c(1:20))
head(dta)
normalize <- function(x){(x-min(x))/(max(x)-min(x))}
size = normalize(dta$views)+
        normalize(dta$ratings)+
        normalize(dta$comments)
node <- data.frame(name = dta$videoID,
                   group = dta$category,
                   size = size)
library(tidyverse)
## ─ Attaching packages ───────────────────────────── tidyverse 1.2.1 ─
## ✓ ggplot2 3.3.0     ✓ purrr   0.3.4
## ✓ tibble  3.0.1     ✓ dplyr   0.8.5
## ✓ tidyr   1.0.3     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.4.0
## Warning: package 'tibble' was built under R version 3.6.2
## Warning: package 'tidyr' was built under R version 3.6.2
## Warning: package 'purrr' was built under R version 3.6.2
## ─ Conflicts ─────────────────────────────── tidyverse_conflicts() ─
## x ggplot2::%+%()         masks psych::%+%()
## x ggplot2::alpha()       masks psych::alpha()
## x dplyr::as_data_frame() masks tibble::as_data_frame(), igraph::as_data_frame()
## x purrr::compose()       masks igraph::compose()
## x tidyr::crossing()      masks igraph::crossing()
## x dplyr::filter()        masks stats::filter()
## x dplyr::groups()        masks igraph::groups()
## x dplyr::lag()           masks stats::lag()
## x purrr::simplify()      masks igraph::simplify()
link <- dta %>% 
  select(videoID,'1':'20') %>% 
  gather(value, target, -videoID) %>% 
  rename(source = 1) %>%
  mutate(value = as.character(50/as.integer(value)),
         target = as.character(target),
         source = as.character(source))
## Warning: attributes are not identical across measure variables;
## they will be dropped
# 所有種類
levels.all <- c(as.character(link[,1]),as.character(link[,3]))
levels.all <- levels(as.factor(levels.all))
# 將商品數字化
for (i in c(1,3)) {
  link[,i] = as.integer(factor(link[,i], levels = levels.all)) - 1
}
node <- node %>% 
  mutate(name = as.character(as.integer(factor(name, levels = levels.all)) - 1),
         group = as.character(group),
         size = as.character(as.integer(size)*100))
onode <- data.frame(name = link$target,
                    group = 'None',
                    size = 5)
node <- rbind(node,onode)
head(link)
head(node)
forceNetwork(Links = link, Nodes = node, Source = "source",
             Target = "target", Value = "value", NodeID = "name",
             Group = "group", Nodesize = "size", opacity = 1, zoom = TRUE,
             legend = TRUE, fontSize = 20,  bounded = FALSE, arrows = TRUE,
             charge = -120, opacityNoHover = 0.6, linkDistance = 150)

商場購物資料集

dat <- read.csv("Rules.csv")
head(dat)

產生Nodes

# 所有商品
all.items = c(as.character(dat[,1]), as.character(dat[,2]), as.character(dat[,3]), as.character(dat[,4]), as.character(dat[,5]), as.character(dat[,6]))
# 所有種類
levels.all = levels(as.factor(all.items))
# 去掉 ""
levels.all = levels.all[-1]
# 建立架構
MyNodes = data.frame(matrix(NA, ncol = 3, nrow = length(levels.all)))
# 命名
colnames(MyNodes) = c("name", "group", "size")
MyNodes[,1] = levels.all
# output的商品與類別
out.items = as.character(dat[,6])
levels.out = levels(dat[,6])
# 依照分類填入group
MyNodes[levels.all %in% levels.out,2] = "Output"
MyNodes[!levels.all %in% levels.out,2] = "Input"
# size都設100
MyNodes[,3] = c(rep(5,5),rep(10,10),rep(100,5))
head(MyNodes)

畫圖

forceNetwork(Links = MyLinks, Nodes = MyNodes, Source = "source",
             Target = "target", Value = "value", NodeID = "name",
             Group = "group", opacity = 1, zoom = TRUE, Nodesize = "size",
             # colourScale = JS("d3.scale.category10()"),
             legend = TRUE, fontSize = 20,  bounded = FALSE,
             charge = -120, opacityNoHover = 0.6, linkDistance = 150)

NBA

library(scales)
## Warning: package 'scales' was built under R version 3.6.2
## 
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
## 
##     discard
## The following object is masked from 'package:readr':
## 
##     col_factor
## The following objects are masked from 'package:psych':
## 
##     alpha, rescale
nba = read.csv("data11_2.csv", header = TRUE, row.names = 1)
head(nba)

看相關

library(corrplot)
## corrplot 0.84 loaded
library(RColorBrewer)
cormatrix.nba = cor(nba)
corrplot(cormatrix.nba, type="upper", order="hclust", tl.col="black", tl.srt=45)

# library(heatmaply)
# breaks = seq(-0.8, 0.8, by = 0.05)
# palette = colorRampPalette(c("navy","white","firebrick3"))(length(breaks))
# colorFunc = col_bin(palette, bins = rescale(breaks))
# heatmaply(cormatrix.nba, k_row = 3, k_col = 3)

igraph作法

cormatrix.nba[cormatrix.nba < 0.4] = 0
cormatrix.nba[cormatrix.nba == 1] = 0
g <- graph_from_adjacency_matrix(cormatrix.nba, weighted=TRUE, add.rownames="row",add.colnames="col")
plot(g, 
     layout=layout.fruchterman.reingold,
     edge.arrow.size=0.2,
     vertex.color='white',
     vertex.size=15)

networkD3作法

MyNodes <- data.frame(name = rownames(cormatrix.nba),
                      group = c(1, 2, 2, 2, 2, 1, 2, 2, 3, 3, 3, 3, 1, 1, 1, 2, 2, 1, 2, 1))
cormatrix.nba.t <- cormatrix.nba
rownames(cormatrix.nba.t) <- as.integer(as.factor(rownames(cormatrix.nba))) - 1
colnames(cormatrix.nba.t) <- as.integer(as.factor(rownames(cormatrix.nba))) - 1

library(reshape2)
## Warning: package 'reshape2' was built under R version 3.6.2
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
MyLinks <- melt(cormatrix.nba.t)
colnames(MyLinks) = c("source", "target", "value")
MyLinks <- MyLinks[MyLinks$value != 0,]
MyLinks <- MyLinks[MyLinks$value != 1,]

forceNetwork(Links = MyLinks, Nodes = MyNodes, Source = "source",
             Target = "target", Value = "value", NodeID = "name",
             Group = "group",opacity = 1, zoom = TRUE,
             # colourScale = JS("d3.scale.category10()"),
             legend = TRUE, fontSize = 20,  bounded = FALSE,
             charge = -120, opacityNoHover = 0.6, linkDistance = 150)

Network Visualization by igraph, by Janpu Hou