library(igraph)
##
## Attaching package: 'igraph'
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
g1 <- graph( edges=c(1,2, 2,3, 3, 1), n=3, directed=F )
plot(g1)
g2 <- graph( c("John", "Jim", "Jim", "Jack", "Jim", "Jack", "John", "John"),
isolates=c("Jesse", "Janis", "Jennifer", "Justin") )
plot(g2)
访问定点和边
E(g2)
## + 4/4 edges from b54c4a6 (vertex names):
## [1] John->Jim Jim ->Jack Jim ->Jack John->John
V(g2)
## + 7/7 vertices, named, from b54c4a6:
## [1] John Jim Jack Jesse Janis Jennifer Justin
查看网络矩阵
g2[]
## 7 x 7 sparse Matrix of class "dgCMatrix"
## John Jim Jack Jesse Janis Jennifer Justin
## John 1 1 . . . . .
## Jim . . 2 . . . .
## Jack . . . . . . .
## Jesse . . . . . . .
## Janis . . . . . . .
## Jennifer . . . . . . .
## Justin . . . . . . .
向网络、顶点或边添加属性:
V(g2)$name
## [1] "John" "Jim" "Jack" "Jesse" "Janis" "Jennifer" "Justin"
V(g2)$gender <- c("male", "male", "male", "male", "female", "female", "male")
E(g2)$type <- "email" # Edge attribute, assign "email" to all edges
E(g2)$weight <- 10 # Edge weight, setting all existing edges to 10
检查属性
edge_attr(g2)
## $type
## [1] "email" "email" "email" "email"
##
## $weight
## [1] 10 10 10 10
vertex_attr(g2)
## $name
## [1] "John" "Jim" "Jack" "Jesse" "Janis" "Jennifer" "Justin"
##
## $gender
## [1] "male" "male" "male" "male" "female" "female" "male"
graph_attr(g2)
## named list()
设置属性的另一种方法(类似地,您可以使用 set _ edge _ attr ()、 set _ vertex _ attr ()等)
g2 <- set_graph_attr(g2, "name", "Email Network")
g2 <- set_graph_attr(g2, "something", "A thing")
graph_attr_names(g2)
## [1] "name" "something"
简化网络图
g2s <- simplify( g2, remove.multiple = T, remove.loops = F,
edge.attr.comb=c(weight="sum", type="ignore") )
plot(g2s, vertex.label.dist=1.5)
空图
eg <- make_empty_graph(40)
plot(eg, vertex.size=10, vertex.label=NA)
全图
fg <- make_full_graph(40)
plot(fg, vertex.size=10, vertex.label=NA)
星图
st <- make_star(40)
plot(st, vertex.size=10, vertex.label=NA)
树状图
tr <- make_tree(40, children = 3, mode = "undirected")
plot(tr, vertex.size=10, vertex.label=NA)
环型图
rn <- make_ring(40)
plot(rn, vertex.size=10, vertex.label=NA)
Erdos-Renyi 随机图模型(n 为节点数,m 为边数)。
er <- sample_gnm(n=100, m=40)
plot(er, vertex.size=6, vertex.label=NA)
Watts-Strogatz 小世界模型创建一个格子(具有模糊的维度和跨维度的大小节点)并随机重新布线边缘,概率为 p。边连接的邻域是 nei。您可以允许循环和多个边
sw <- sample_smallworld(dim=2, size=10, nei=1, p=0.1)
plot(sw, vertex.size=6, vertex.label=NA, layout=layout_in_circle)
无标度图的 Barabasi-Albert 优先连接模型(n 为节点数,幂为连接的幂(1为线性) ,m 为每个时间步上添加的边数)
ba <- sample_pa(n=100, power=1, m=1, directed=F)
plot(ba, vertex.size=6, vertex.label=NA)
需要处理的数据集有两个文件
nodes <- readr::read_csv("/Users/milin/Downloads/R-igraph-Network-Workshop-NetSciX-master/Dataset1-Media-Example-NODES.csv")
## Rows: 17 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): id, media, type.label
## dbl (2): media.type, audience.size
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
links <- readr::read_csv("/Users/milin/Downloads/R-igraph-Network-Workshop-NetSciX-master/Dataset1-Media-Example-EDGES.csv")
## Rows: 52 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): from, to, type
## dbl (1): weight
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
然后检查数据
head(nodes)
## # A tibble: 6 × 5
## id media media.type type.label audience.size
## <chr> <chr> <dbl> <chr> <dbl>
## 1 s01 NY Times 1 Newspaper 20
## 2 s02 Washington Post 1 Newspaper 25
## 3 s03 Wall Street Journal 1 Newspaper 30
## 4 s04 USA Today 1 Newspaper 32
## 5 s05 LA Times 1 Newspaper 20
## 6 s06 New York Post 1 Newspaper 50
head(links)
## # A tibble: 6 × 4
## from to weight type
## <chr> <chr> <dbl> <chr>
## 1 s01 s02 10 hyperlink
## 2 s01 s02 12 hyperlink
## 3 s01 s03 22 hyperlink
## 4 s01 s04 21 hyperlink
## 5 s04 s11 22 mention
## 6 s05 s15 21 mention
nrow(nodes); length(unique(nodes$id))
## [1] 17
## [1] 17
nrow(links); nrow(unique(links[,c("from", "to")]))
## [1] 52
## [1] 49
需要注意的是, 我们可能会发现又重复的链接, 这意味着两个节点之间可能存在多条线段. 下一步对这些链接进行聚合.
links <- aggregate(links[,3], links[,-3], sum)
links <- links[order(links$from, links$to),]
colnames(links)[4] <- "weight"
rownames(links) <- NULL
第一步还是读取数据集.
links2 <- readr::read_csv("/Users/milin/Downloads/R-igraph-Network-Workshop-NetSciX-master/Dataset2-Media-User-Example-EDGES.csv")
## New names:
## Rows: 10 Columns: 21
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (1): ...1 dbl (20): U01, U02, U03, U04, U05, U06, U07, U08, U09, U10, U11, U12,
## U13, U...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
nodes2 <- readr::read_csv("/Users/milin/Downloads/R-igraph-Network-Workshop-NetSciX-master/Dataset2-Media-User-Example-NODES.csv")
## Rows: 30 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): id, media, media.name
## dbl (2): media.type, audience.size
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
查看数据
head(nodes2)
## # A tibble: 6 × 5
## id media media.type media.name audience.size
## <chr> <chr> <dbl> <chr> <dbl>
## 1 s01 NYT 1 Newspaper 20
## 2 s02 WaPo 1 Newspaper 25
## 3 s03 WSJ 1 Newspaper 30
## 4 s04 USAT 1 Newspaper 32
## 5 s05 LATimes 1 Newspaper 20
## 6 s06 CNN 2 TV 56
head(links2)
## # A tibble: 6 × 21
## ...1 U01 U02 U03 U04 U05 U06 U07 U08 U09 U10 U11 U12
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 s01 1 1 1 0 0 0 0 0 0 0 0 0
## 2 s02 0 0 0 1 1 0 0 0 0 0 0 0
## 3 s03 0 0 0 0 0 1 1 1 1 0 0 0
## 4 s04 0 0 0 0 0 0 0 0 1 1 1 0
## 5 s05 0 0 0 0 0 0 0 0 0 0 1 1
## 6 s06 0 0 0 0 0 0 0 0 0 0 0 0
## # ℹ 8 more variables: U13 <dbl>, U14 <dbl>, U15 <dbl>, U16 <dbl>, U17 <dbl>,
## # U18 <dbl>, U19 <dbl>, U20 <dbl>
links 就是一个邻接矩阵
links2 <- as.matrix(links2)
dim(links2)
## [1] 10 21
dim(nodes2)
## [1] 30 5
我们首先将原始数据转换为图形网络对象。这里我们使用 igraph 的 Graph.data.frame 函数,它接受两个数据帧: d 和顶点。
library(igraph)
net <- graph_from_data_frame(d=links, vertices=nodes, directed=T)
class(net)
## [1] "igraph"
net
## IGRAPH 376696a DNW- 17 49 --
## + attr: name (v/c), media (v/c), media.type (v/n), type.label (v/c),
## | audience.size (v/n), type (e/c), weight (e/n)
## + edges from 376696a (vertex names):
## [1] s01->s02 s01->s03 s01->s04 s01->s15 s02->s01 s02->s03 s02->s09 s02->s10
## [9] s03->s01 s03->s04 s03->s05 s03->s08 s03->s10 s03->s11 s03->s12 s04->s03
## [17] s04->s06 s04->s11 s04->s12 s04->s17 s05->s01 s05->s02 s05->s09 s05->s15
## [25] s06->s06 s06->s16 s06->s17 s07->s03 s07->s08 s07->s10 s07->s14 s08->s03
## [33] s08->s07 s08->s09 s09->s10 s10->s03 s12->s06 s12->s13 s12->s14 s13->s12
## [41] s13->s17 s14->s11 s14->s13 s15->s01 s15->s04 s15->s06 s16->s06 s16->s17
## [49] s17->s04
我们还可以通过以下方式轻松访问节点、边及其属性:
E(net) # The edges of the "net" object
## + 49/49 edges from 376696a (vertex names):
## [1] s01->s02 s01->s03 s01->s04 s01->s15 s02->s01 s02->s03 s02->s09 s02->s10
## [9] s03->s01 s03->s04 s03->s05 s03->s08 s03->s10 s03->s11 s03->s12 s04->s03
## [17] s04->s06 s04->s11 s04->s12 s04->s17 s05->s01 s05->s02 s05->s09 s05->s15
## [25] s06->s06 s06->s16 s06->s17 s07->s03 s07->s08 s07->s10 s07->s14 s08->s03
## [33] s08->s07 s08->s09 s09->s10 s10->s03 s12->s06 s12->s13 s12->s14 s13->s12
## [41] s13->s17 s14->s11 s14->s13 s15->s01 s15->s04 s15->s06 s16->s06 s16->s17
## [49] s17->s04
V(net) # The vertices of the "net" object
## + 17/17 vertices, named, from 376696a:
## [1] s01 s02 s03 s04 s05 s06 s07 s08 s09 s10 s11 s12 s13 s14 s15 s16 s17
E(net)$type # Edge attribute "type"
## [1] "hyperlink" "hyperlink" "hyperlink" "mention" "hyperlink" "hyperlink"
## [7] "hyperlink" "hyperlink" "hyperlink" "hyperlink" "hyperlink" "hyperlink"
## [13] "mention" "hyperlink" "hyperlink" "hyperlink" "mention" "mention"
## [19] "hyperlink" "mention" "mention" "hyperlink" "hyperlink" "mention"
## [25] "hyperlink" "hyperlink" "mention" "mention" "mention" "hyperlink"
## [31] "mention" "hyperlink" "mention" "mention" "mention" "hyperlink"
## [37] "mention" "hyperlink" "mention" "hyperlink" "mention" "mention"
## [43] "mention" "hyperlink" "hyperlink" "hyperlink" "hyperlink" "mention"
## [49] "hyperlink"
V(net)$media # Vertex attribute "media"
## [1] "NY Times" "Washington Post" "Wall Street Journal"
## [4] "USA Today" "LA Times" "New York Post"
## [7] "CNN" "MSNBC" "FOX News"
## [10] "ABC" "BBC" "Yahoo News"
## [13] "Google News" "Reuters.com" "NYTimes.com"
## [16] "WashingtonPost.com" "AOL.com"
现在我们有了图形网络对象,让我们首先尝试绘制它。
plot(net, edge.arrow.size=.4,vertex.label=NA)
这看起来不太好。让我们通过移除图中的循环开始修复。
net <- simplify(net, remove.multiple = F, remove.loops = T)
# simplify(net, edge.attr.comb=list(weight="sum","ignore")) 还可以对多条边进行权重相加
如果需要,可以从图形网络中提取边列表或矩阵。
as_edgelist(net, names=T)
## [,1] [,2]
## [1,] "s01" "s02"
## [2,] "s01" "s03"
## [3,] "s01" "s04"
## [4,] "s01" "s15"
## [5,] "s02" "s01"
## [6,] "s02" "s03"
## [7,] "s02" "s09"
## [8,] "s02" "s10"
## [9,] "s03" "s01"
## [10,] "s03" "s04"
## [11,] "s03" "s05"
## [12,] "s03" "s08"
## [13,] "s03" "s10"
## [14,] "s03" "s11"
## [15,] "s03" "s12"
## [16,] "s04" "s03"
## [17,] "s04" "s06"
## [18,] "s04" "s11"
## [19,] "s04" "s12"
## [20,] "s04" "s17"
## [21,] "s05" "s01"
## [22,] "s05" "s02"
## [23,] "s05" "s09"
## [24,] "s05" "s15"
## [25,] "s06" "s16"
## [26,] "s06" "s17"
## [27,] "s07" "s03"
## [28,] "s07" "s08"
## [29,] "s07" "s10"
## [30,] "s07" "s14"
## [31,] "s08" "s03"
## [32,] "s08" "s07"
## [33,] "s08" "s09"
## [34,] "s09" "s10"
## [35,] "s10" "s03"
## [36,] "s12" "s06"
## [37,] "s12" "s13"
## [38,] "s12" "s14"
## [39,] "s13" "s12"
## [40,] "s13" "s17"
## [41,] "s14" "s11"
## [42,] "s14" "s13"
## [43,] "s15" "s01"
## [44,] "s15" "s04"
## [45,] "s15" "s06"
## [46,] "s16" "s06"
## [47,] "s16" "s17"
## [48,] "s17" "s04"
as_adjacency_matrix(net, attr="weight")
## 17 x 17 sparse Matrix of class "dgCMatrix"
## [[ suppressing 17 column names 's01', 's02', 's03' ... ]]
##
## s01 . 22 22 21 . . . . . . . . . . 20 . .
## s02 23 . 21 . . . . . 1 5 . . . . . . .
## s03 21 . . 22 1 . . 4 . 2 1 1 . . . . .
## s04 . . 23 . . 1 . . . . 22 3 . . . . 2
## s05 1 21 . . . . . . 2 . . . . . 21 . .
## s06 . . . . . . . . . . . . . . . 21 21
## s07 . . 1 . . . . 22 . 21 . . . 4 . . .
## s08 . . 2 . . . 21 . 23 . . . . . . . .
## s09 . . . . . . . . . 21 . . . . . . .
## s10 . . 2 . . . . . . . . . . . . . .
## s11 . . . . . . . . . . . . . . . . .
## s12 . . . . . 2 . . . . . . 22 22 . . .
## s13 . . . . . . . . . . . 21 . . . . 1
## s14 . . . . . . . . . . 1 . 21 . . . .
## s15 22 . . 1 . 4 . . . . . . . . . . .
## s16 . . . . . 23 . . . . . . . . . . 21
## s17 . . . 4 . . . . . . . . . . . . .
或描述节点和边缘的数据框:
as_data_frame(net, what="edges")
## from to type weight
## 1 s01 s02 hyperlink 22
## 2 s01 s03 hyperlink 22
## 3 s01 s04 hyperlink 21
## 4 s01 s15 mention 20
## 5 s02 s01 hyperlink 23
## 6 s02 s03 hyperlink 21
## 7 s02 s09 hyperlink 1
## 8 s02 s10 hyperlink 5
## 9 s03 s01 hyperlink 21
## 10 s03 s04 hyperlink 22
## 11 s03 s05 hyperlink 1
## 12 s03 s08 hyperlink 4
## 13 s03 s10 mention 2
## 14 s03 s11 hyperlink 1
## 15 s03 s12 hyperlink 1
## 16 s04 s03 hyperlink 23
## 17 s04 s06 mention 1
## 18 s04 s11 mention 22
## 19 s04 s12 hyperlink 3
## 20 s04 s17 mention 2
## 21 s05 s01 mention 1
## 22 s05 s02 hyperlink 21
## 23 s05 s09 hyperlink 2
## 24 s05 s15 mention 21
## 25 s06 s16 hyperlink 21
## 26 s06 s17 mention 21
## 27 s07 s03 mention 1
## 28 s07 s08 mention 22
## 29 s07 s10 hyperlink 21
## 30 s07 s14 mention 4
## 31 s08 s03 hyperlink 2
## 32 s08 s07 mention 21
## 33 s08 s09 mention 23
## 34 s09 s10 mention 21
## 35 s10 s03 hyperlink 2
## 36 s12 s06 mention 2
## 37 s12 s13 hyperlink 22
## 38 s12 s14 mention 22
## 39 s13 s12 hyperlink 21
## 40 s13 s17 mention 1
## 41 s14 s11 mention 1
## 42 s14 s13 mention 21
## 43 s15 s01 hyperlink 22
## 44 s15 s04 hyperlink 1
## 45 s15 s06 hyperlink 4
## 46 s16 s06 hyperlink 23
## 47 s16 s17 mention 21
## 48 s17 s04 hyperlink 4
as_data_frame(net, what="vertices")
## name media media.type type.label audience.size
## s01 s01 NY Times 1 Newspaper 20
## s02 s02 Washington Post 1 Newspaper 25
## s03 s03 Wall Street Journal 1 Newspaper 30
## s04 s04 USA Today 1 Newspaper 32
## s05 s05 LA Times 1 Newspaper 20
## s06 s06 New York Post 1 Newspaper 50
## s07 s07 CNN 2 TV 56
## s08 s08 MSNBC 2 TV 34
## s09 s09 FOX News 2 TV 60
## s10 s10 ABC 2 TV 23
## s11 s11 BBC 2 TV 34
## s12 s12 Yahoo News 3 Online 33
## s13 s13 Google News 3 Online 23
## s14 s14 Reuters.com 3 Online 12
## s15 s15 NYTimes.com 3 Online 24
## s16 s16 WashingtonPost.com 3 Online 28
## s17 s17 AOL.com 3 Online 33
net2 <- graph_from_incidence_matrix(links2)
## Warning in mde(x): NAs introduced by coercion
plot(net2)
常用函数
网络图的绘图元素 通常就是edges 和links 这两个部分. 即vertex 参数和edge 参数
plot(net, vertex.shape="none", vertex.label=V(net)$media,
vertex.label.font=2, vertex.label.color="gray40",
vertex.label.cex=.7, edge.color="gray85")
网络布局是返回网络中每个节点坐标的简单算法。
随机图
net.bg <- sample_pa(80)
V(net.bg)$size <- 8
V(net.bg)$frame.color <- "white"
V(net.bg)$color <- "orange"
V(net.bg)$label <- ""
E(net.bg)$arrow.mode <- 0
plot(net.bg)
可以在plot 函数中设置布局
plot(net.bg, layout=layout_randomly)
或者你可以提前计算顶点坐标:
l <- layout_in_circle(net.bg)
plot(net.bg, layout=l)
其他布局
plot(net.bg,layout = layout.gem(net.bg))
LGL 算法适用于大型连通图。在这里,您还可以指定一个 root: 一个将放置在布局中间的节点。
plot(net.bg, layout=layout_with_lgl)
让我们看看图中所有可用的布局:
layouts <- grep("^layout_", ls("package:igraph"), value=TRUE)[-1]
# Remove layouts that do not apply to our graph.
layouts <- layouts[!grepl("bipartite|merge|norm|sugiyama|tree", layouts)]
par(mfrow=c(3,3), mar=c(1,1,1,1))
for (layout in layouts) {
print(layout)
l <- do.call(layout, list(net))
plot(net, edge.arrow.mode=0, layout=l, main=layout) }
## [1] "layout_as_star"
## [1] "layout_components"
## [1] "layout_in_circle"
## [1] "layout_nicely"
## [1] "layout_on_grid"
## [1] "layout_on_sphere"
## [1] "layout_randomly"
## [1] "layout_with_dh"
## [1] "layout_with_drl"
## [1] "layout_with_fr"
## [1] "layout_with_gem"
## [1] "layout_with_graphopt"
## [1] "layout_with_kk"
## [1] "layout_with_lgl"
## [1] "layout_with_mds"
hist(links$weight)
mean(links$weight)
## [1] 12.40816
sd(links$weight)
## [1] 9.905635
可以删除某些边和节点, 保留那些权重高的边
cut.off <- mean(links$weight)
net.sp <- delete_edges(net, E(net)[weight<cut.off])
plot(net.sp)
考虑这个问题的另一种方法是分别绘制两种领带类型(超链接和提及)。
E(net)$width <- 1.5
plot(net, edge.color=c("dark red", "slategrey")[(E(net)$type=="hyperlink")+1],
vertex.color="gray40", layout=layout.circle)
net.m <- net - E(net)[E(net)$type=="hyperlink"] # another way to delete edges
net.h <- net - E(net)[E(net)$type=="mention"]
# Plot the two links separately:
par(mfrow=c(1,2))
plot(net.h, vertex.color="orange", main="Tie: Hyperlink")
plot(net.m, vertex.color="lightsteelblue2", main="Tie: Mention")
R 和图允许网络的交互式绘图。如果您想稍微调整一下小型图形的布局,这可能是一个有用的选项。在手动调整布局之后,您可以获取节点的坐标,并将它们用于其他绘图。
tkid <- tkplot(net) #tkid is the id of the tkplot that will open
l <- tkplot.getcoords(tkid) # grab the coordinates from tkplot
tk_close(tkid, window.close = T)
plot(net, layout=l)
下面是网络矩阵的一个快速热图
netm <- get.adjacency(net, attr="weight", sparse=F)
colnames(netm) <- V(net)$media
rownames(netm) <- V(net)$media
palf <- colorRampPalette(c("gold", "dark orange"))
heatmap(netm[,17:1], Rowv = NA, Colv = NA, col = palf(100),
scale="none", margins=c(10,10) )
网络中所有可能边的现有边的比例。
edge_density(net, loops=F)
## [1] 0.1764706
# ecount(net)/(vcount(net)*(vcount(net)-1)) #for a directed network
计算公式是 number of edges/(number of vertices)(number of vertices - 1)
就是边的数目 / 点的数量*点的数量-1
有向图中长度为2的循环, 类似于微博的互相关联.
reciprocity(net)
## [1] 0.4166667
dyad_census(net) # Mutual, asymmetric, and nyll node pairs
## $mut
## [1] 10
##
## $asym
## [1] 28
##
## $null
## [1] 98
2*dyad_census(net)$mut/ecount(net) # Calculating reciprocity
## [1] 0.4166667
计算公式就是 : The number of pairs with mutual connections / (edges/2)
其中dyad_census 函数返回几个指标:
mut - The number of pairs with mutual(相互) connections.
asym - The number of pairs with non-mutual connections.
null - The number of pairs with no connection between them.
即三个点相关联(A->B->C) , 传递性可以通过全局聚类系数和局部聚类系数.
全局聚类系数: 三角形个数*3 / 节点连接三元组的个数
局部聚类系数 : 节点存在链接的节点数量 / 节点可能存在的节点对数量
transitivity(net, type="global") # net is treated as an undirected network
## [1] 0.372549
transitivity(as.undirected(net, mode="collapse")) # same as above
## [1] 0.372549
transitivity(net, type="local")
## [1] 0.6000000 0.6000000 0.2500000 0.3333333 0.5000000 0.4000000 0.3333333
## [8] 0.3333333 0.3333333 0.5000000 0.3333333 0.3000000 0.3333333 0.1666667
## [15] 0.5000000 1.0000000 0.3333333
triad_census(net) # for directed networks 计算不同类型三个顶点子图的数量
## [1] 244 241 80 13 11 27 15 22 4 1 8 4 4 3 3 0
geodesic path 表示两个点的最短路径
是指连接任意两个节点的所有最短路径中最长路径的长度. 任意两个点都有一个最短路径, 之一组最短路径中的最长那个就是直径
diameter()函数返回直径 , get_diameter() 返回路径的序列
请注意,默认情况下使用边权重,除非设置为 NA。
diameter(net, directed=F, weights=NA)
## [1] 4
diameter(net, directed=F)
## [1] 28
diam <- get_diameter(net, directed=F,weights = NA)
diam
## + 5/17 vertices, named, from 6f62359:
## [1] s02 s01 s04 s06 s16
对路径进行可视化
vcol <- rep("gray40", vcount(net))
vcol[diam] <- "gold"
ecol <- rep("gray80", ecount(net))
ecol[E(net, path=diam)] <- "orange"
# E(net, path=diam) finds edges along a path, here 'diam'
plot(net, vertex.color=vcol, edge.color=ecol, edge.arrow.mode=0)
度就是连接的边数量
deg <- degree(net, mode="all")
plot(net, vertex.size=deg*3)
deg.dist <- degree_distribution(net, cumulative=T, mode="all")
plot( x=0:max(deg), y=1-deg.dist, pch=19, cex=1.2, col="orange",
xlab="Degree", ylab="Cumulative Frequency")
这个指标主要用来描述每个点的重要性
degree
用节点的入度来衡量节点的重要性 . 局限性在于没考虑邻居节点的重要性.
degree(net, mode="in")
## s01 s02 s03 s04 s05 s06 s07 s08 s09 s10 s11 s12 s13 s14 s15 s16 s17
## 4 2 6 4 1 4 1 2 3 4 3 3 2 2 2 1 4
# centr_degree(net, mode="in", normalized=T)
Closeness 接近度
closeness(net, mode="all", weights=NA)
## s01 s02 s03 s04 s05 s06 s07
## 0.03333333 0.03030303 0.04166667 0.03846154 0.03225806 0.03125000 0.03030303
## s08 s09 s10 s11 s12 s13 s14
## 0.02857143 0.02564103 0.02941176 0.03225806 0.03571429 0.02702703 0.02941176
## s15 s16 s17
## 0.03030303 0.02222222 0.02857143
# centr_clo(net, mode="all", normalized=T)
Eigenvector 特征向量 中心性
用转移矩阵描述网络, 节点的重要性就是节点被访问的概率. 特征向量就是重要性
eigen_centrality(net, directed=T, weights=NA)
## $vector
## s01 s02 s03 s04 s05 s06 s07 s08
## 0.6638179 0.3314674 1.0000000 0.9133129 0.3326443 0.7468249 0.1244195 0.3740317
## s09 s10 s11 s12 s13 s14 s15 s16
## 0.3453324 0.5991652 0.7334202 0.7519086 0.3470857 0.2915055 0.3314674 0.2484270
## s17
## 0.7503292
##
## $value
## [1] 3.006215
##
## $options
## $options$bmat
## [1] "I"
##
## $options$n
## [1] 17
##
## $options$which
## [1] "LR"
##
## $options$nev
## [1] 1
##
## $options$tol
## [1] 0
##
## $options$ncv
## [1] 0
##
## $options$ldv
## [1] 0
##
## $options$ishift
## [1] 1
##
## $options$maxiter
## [1] 1000
##
## $options$nb
## [1] 1
##
## $options$mode
## [1] 1
##
## $options$start
## [1] 1
##
## $options$sigma
## [1] 0
##
## $options$sigmai
## [1] 0
##
## $options$info
## [1] 0
##
## $options$iter
## [1] 7
##
## $options$nconv
## [1] 1
##
## $options$numop
## [1] 31
##
## $options$numopb
## [1] 0
##
## $options$numreo
## [1] 18
# centr_eigen(net, directed=T, normalized=T)
Betweenness
betweenness(net, directed=T, weights=NA)
## s01 s02 s03 s04 s05 s06
## 24.0000000 5.8333333 127.0000000 93.5000000 16.5000000 20.3333333
## s07 s08 s09 s10 s11 s12
## 1.8333333 19.5000000 0.8333333 15.0000000 0.0000000 33.5000000
## s13 s14 s15 s16 s17
## 20.0000000 4.0000000 5.6666667 0.0000000 58.5000000
edge_betweenness(net, directed=T, weights=NA)
## [1] 10.833333 11.333333 8.333333 9.500000 4.000000 12.500000 3.000000
## [8] 2.333333 24.000000 16.000000 31.500000 32.500000 9.500000 6.500000
## [15] 23.000000 65.333333 11.000000 6.500000 18.000000 8.666667 5.333333
## [22] 10.000000 6.000000 11.166667 15.000000 21.333333 10.000000 2.000000
## [29] 1.333333 4.500000 11.833333 16.833333 6.833333 16.833333 31.000000
## [36] 17.000000 18.000000 14.500000 7.500000 28.500000 3.000000 17.000000
## [43] 5.666667 9.666667 6.333333 1.000000 15.000000 74.500000
# centr_betw(net, directed=T, normalized=T)
网络中每对节点之间最短距离的平均值(对于有向图来说是双向的)。
mean_distance(net, directed=F)
## [1] 6.948529
我们还可以在图中找到所有最短路径的长度:
distances(net) # with edge weights
## s01 s02 s03 s04 s05 s06 s07 s08 s09 s10 s11 s12 s13 s14 s15 s16 s17
## s01 0 4 2 6 1 5 3 4 3 4 3 3 9 4 7 26 8
## s02 4 0 4 8 3 7 5 6 1 5 5 5 11 6 9 28 10
## s03 2 4 0 4 1 3 1 2 3 2 1 1 7 2 5 24 6
## s04 6 8 4 0 5 1 5 6 7 6 5 3 3 6 1 22 2
## s05 1 3 1 5 0 4 2 3 2 3 2 2 8 3 6 25 7
## s06 5 7 3 1 4 0 4 5 6 5 4 2 4 5 2 21 3
## s07 3 5 1 5 2 4 0 3 4 3 2 2 8 3 6 25 7
## s08 4 6 2 6 3 5 3 0 5 4 3 3 9 4 7 26 8
## s09 3 1 3 7 2 6 4 5 0 5 4 4 10 5 8 27 9
## s10 4 5 2 6 3 5 3 4 5 0 3 3 9 4 7 26 8
## s11 3 5 1 5 2 4 2 3 4 3 0 2 8 1 6 25 7
## s12 3 5 1 3 2 2 2 3 4 3 2 0 6 3 4 23 5
## s13 9 11 7 3 8 4 8 9 10 9 8 6 0 9 4 22 1
## s14 4 6 2 6 3 5 3 4 5 4 1 3 9 0 7 26 8
## s15 7 9 5 1 6 2 6 7 8 7 6 4 4 7 0 23 3
## s16 26 28 24 22 25 21 25 26 27 26 25 23 22 26 23 0 21
## s17 8 10 6 2 7 3 7 8 9 8 7 5 1 8 3 21 0
distances(net, weights=NA) # ignore weights
## s01 s02 s03 s04 s05 s06 s07 s08 s09 s10 s11 s12 s13 s14 s15 s16 s17
## s01 0 1 1 1 1 2 2 2 2 2 2 2 3 3 1 3 2
## s02 1 0 1 2 1 3 2 2 1 1 2 2 3 3 2 4 3
## s03 1 1 0 1 1 2 1 1 2 1 1 1 2 2 2 3 2
## s04 1 2 1 0 2 1 2 2 3 2 1 1 2 2 1 2 1
## s05 1 1 1 2 0 2 2 2 1 2 2 2 3 3 1 3 3
## s06 2 3 2 1 2 0 3 3 3 3 2 1 2 2 1 1 1
## s07 2 2 1 2 2 3 0 1 2 1 2 2 2 1 3 4 3
## s08 2 2 1 2 2 3 1 0 1 2 2 2 3 2 3 4 3
## s09 2 1 2 3 1 3 2 1 0 1 3 3 4 3 2 4 4
## s10 2 1 1 2 2 3 1 2 1 0 2 2 3 2 3 4 3
## s11 2 2 1 1 2 2 2 2 3 2 0 2 2 1 2 3 2
## s12 2 2 1 1 2 1 2 2 3 2 2 0 1 1 2 2 2
## s13 3 3 2 2 3 2 2 3 4 3 2 1 0 1 3 2 1
## s14 3 3 2 2 3 2 1 2 3 2 1 1 1 0 3 3 2
## s15 1 2 2 1 1 1 3 3 2 3 2 2 3 3 0 2 2
## s16 3 4 3 2 3 1 4 4 4 4 3 2 2 3 2 0 1
## s17 2 3 2 1 3 1 3 3 4 3 2 2 1 2 2 1 0
我们还可以找到特定节点之间的最短路径,比如 MSNBC 和纽约邮报之间的路径:
news.path <- shortest_paths(net,
from = V(net)[media=="MSNBC"],
to = V(net)[media=="New York Post"],
output = "both") # both path nodes and edges
# Generate edge color variable to plot the path:
ecol <- rep("gray80", ecount(net))
ecol[unlist(news.path$epath)] <- "orange"
# Generate edge width variable to plot the path:
ew <- rep(2, ecount(net))
ew[unlist(news.path$epath)] <- 4
# Generate node color variable to plot the path:
vcol <- rep("gray40", vcount(net))
vcol[unlist(news.path$vpath)] <- "gold"
plot(net, vertex.color=vcol, edge.color=ecol,
edge.width=ew, edge.arrow.mode=0)
对于单个节点, 实用incident函数, 对于多个节点, 使用incident_edges函数.
inc.edges <- incident(net, V(net)[media=="Wall Street Journal"], mode="all")
# Set colors to plot the selected edges.
ecol <- rep("gray80", ecount(net))
ecol[inc.edges] <- "orange"
vcol <- rep("grey40", vcount(net))
vcol[V(net)$media=="Wall Street Journal"] <- "gold"
plot(net, vertex.color=vcol, edge.color=ecol)
neigh.nodes <- neighbors(net, V(net)[media=="Wall Street Journal"], mode="out")
# Set colors to plot the neighbors:
vcol[neigh.nodes] <- "#ff9d00"
plot(net, vertex.color=vcol)
E(network)[X %–% Y] : 选择顶点集 X 和 Y 之间的边,忽略方向
E(network)[X %->% Y] : 选择从顶点集 X 到顶点集 Y 的边
E(net)[ V(net)[type.label=="Newspaper"] %->% V(net)[type.label=="Online"] ]
## + 7/48 edges from 6f62359 (vertex names):
## [1] s01->s15 s03->s12 s04->s12 s04->s17 s05->s15 s06->s16 s06->s17
Two vertices are cocited if there is another vertex citing both of them. cocitation simply counts how many types two vertices are cocited. The bibliographic coupling of two vertices is the number of other vertices they both cite, bibcoupling calculates this.
cocitation(net)
## s01 s02 s03 s04 s05 s06 s07 s08 s09 s10 s11 s12 s13 s14 s15 s16 s17
## s01 0 1 1 2 1 1 0 1 2 2 1 1 0 0 1 0 0
## s02 1 0 1 1 0 0 0 0 1 0 0 0 0 0 2 0 0
## s03 1 1 0 1 0 1 1 1 2 2 1 1 0 1 1 0 1
## s04 2 1 1 0 1 1 0 1 0 1 1 1 0 0 1 0 0
## s05 1 0 0 1 0 0 0 1 0 1 1 1 0 0 0 0 0
## s06 1 0 1 1 0 0 0 0 0 0 1 1 1 1 0 0 2
## s07 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0
## s08 1 0 1 1 1 0 0 0 0 2 1 1 0 1 0 0 0
## s09 2 1 2 0 0 0 1 0 0 1 0 0 0 0 1 0 0
## s10 2 0 2 1 1 0 0 2 1 0 1 1 0 1 0 0 0
## s11 1 0 1 1 1 1 0 1 0 1 0 2 1 0 0 0 1
## s12 1 0 1 1 1 1 0 1 0 1 2 0 0 0 0 0 2
## s13 0 0 0 0 0 1 0 0 0 0 1 0 0 1 0 0 0
## s14 0 0 1 0 0 1 0 1 0 1 0 0 1 0 0 0 0
## s15 1 2 1 1 0 0 0 0 1 0 0 0 0 0 0 0 0
## s16 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
## s17 0 0 1 0 0 2 0 0 0 0 1 2 0 0 0 1 0
在此之前, 创建无向链接
net.sym <- as.undirected(net, mode= "collapse",
edge.attr.comb=list(weight="sum", "ignore")) # edge.attr.comb 用来处理权重
mode=“collapse” 表示任意链接节点中创建链接 mode=“each” 为网络中的每个有向链接创建无向链接 mode=“mutual” 为图中的每个对称链接创建无向链接
cliques(net.sym) # list of cliques
## [[1]]
## + 1/17 vertex, named, from 9ce560a:
## [1] s03
##
## [[2]]
## + 1/17 vertex, named, from 9ce560a:
## [1] s06
##
## [[3]]
## + 1/17 vertex, named, from 9ce560a:
## [1] s14
##
## [[4]]
## + 1/17 vertex, named, from 9ce560a:
## [1] s09
##
## [[5]]
## + 1/17 vertex, named, from 9ce560a:
## [1] s04
##
## [[6]]
## + 2/17 vertices, named, from 9ce560a:
## [1] s04 s06
##
## [[7]]
## + 2/17 vertices, named, from 9ce560a:
## [1] s03 s04
##
## [[8]]
## + 1/17 vertex, named, from 9ce560a:
## [1] s05
##
## [[9]]
## + 2/17 vertices, named, from 9ce560a:
## [1] s05 s09
##
## [[10]]
## + 2/17 vertices, named, from 9ce560a:
## [1] s03 s05
##
## [[11]]
## + 1/17 vertex, named, from 9ce560a:
## [1] s13
##
## [[12]]
## + 2/17 vertices, named, from 9ce560a:
## [1] s13 s14
##
## [[13]]
## + 1/17 vertex, named, from 9ce560a:
## [1] s10
##
## [[14]]
## + 2/17 vertices, named, from 9ce560a:
## [1] s09 s10
##
## [[15]]
## + 2/17 vertices, named, from 9ce560a:
## [1] s03 s10
##
## [[16]]
## + 1/17 vertex, named, from 9ce560a:
## [1] s16
##
## [[17]]
## + 2/17 vertices, named, from 9ce560a:
## [1] s06 s16
##
## [[18]]
## + 1/17 vertex, named, from 9ce560a:
## [1] s08
##
## [[19]]
## + 2/17 vertices, named, from 9ce560a:
## [1] s08 s09
##
## [[20]]
## + 2/17 vertices, named, from 9ce560a:
## [1] s03 s08
##
## [[21]]
## + 1/17 vertex, named, from 9ce560a:
## [1] s01
##
## [[22]]
## + 2/17 vertices, named, from 9ce560a:
## [1] s01 s05
##
## [[23]]
## + 3/17 vertices, named, from 9ce560a:
## [1] s01 s03 s05
##
## [[24]]
## + 2/17 vertices, named, from 9ce560a:
## [1] s01 s04
##
## [[25]]
## + 3/17 vertices, named, from 9ce560a:
## [1] s01 s03 s04
##
## [[26]]
## + 2/17 vertices, named, from 9ce560a:
## [1] s01 s03
##
## [[27]]
## + 1/17 vertex, named, from 9ce560a:
## [1] s17
##
## [[28]]
## + 2/17 vertices, named, from 9ce560a:
## [1] s16 s17
##
## [[29]]
## + 3/17 vertices, named, from 9ce560a:
## [1] s06 s16 s17
##
## [[30]]
## + 2/17 vertices, named, from 9ce560a:
## [1] s13 s17
##
## [[31]]
## + 2/17 vertices, named, from 9ce560a:
## [1] s04 s17
##
## [[32]]
## + 3/17 vertices, named, from 9ce560a:
## [1] s04 s06 s17
##
## [[33]]
## + 2/17 vertices, named, from 9ce560a:
## [1] s06 s17
##
## [[34]]
## + 1/17 vertex, named, from 9ce560a:
## [1] s12
##
## [[35]]
## + 2/17 vertices, named, from 9ce560a:
## [1] s12 s13
##
## [[36]]
## + 3/17 vertices, named, from 9ce560a:
## [1] s12 s13 s14
##
## [[37]]
## + 2/17 vertices, named, from 9ce560a:
## [1] s04 s12
##
## [[38]]
## + 3/17 vertices, named, from 9ce560a:
## [1] s04 s06 s12
##
## [[39]]
## + 3/17 vertices, named, from 9ce560a:
## [1] s03 s04 s12
##
## [[40]]
## + 2/17 vertices, named, from 9ce560a:
## [1] s12 s14
##
## [[41]]
## + 2/17 vertices, named, from 9ce560a:
## [1] s06 s12
##
## [[42]]
## + 2/17 vertices, named, from 9ce560a:
## [1] s03 s12
##
## [[43]]
## + 1/17 vertex, named, from 9ce560a:
## [1] s11
##
## [[44]]
## + 2/17 vertices, named, from 9ce560a:
## [1] s04 s11
##
## [[45]]
## + 3/17 vertices, named, from 9ce560a:
## [1] s03 s04 s11
##
## [[46]]
## + 2/17 vertices, named, from 9ce560a:
## [1] s11 s14
##
## [[47]]
## + 2/17 vertices, named, from 9ce560a:
## [1] s03 s11
##
## [[48]]
## + 1/17 vertex, named, from 9ce560a:
## [1] s07
##
## [[49]]
## + 2/17 vertices, named, from 9ce560a:
## [1] s07 s08
##
## [[50]]
## + 3/17 vertices, named, from 9ce560a:
## [1] s03 s07 s08
##
## [[51]]
## + 2/17 vertices, named, from 9ce560a:
## [1] s07 s10
##
## [[52]]
## + 3/17 vertices, named, from 9ce560a:
## [1] s03 s07 s10
##
## [[53]]
## + 2/17 vertices, named, from 9ce560a:
## [1] s07 s14
##
## [[54]]
## + 2/17 vertices, named, from 9ce560a:
## [1] s03 s07
##
## [[55]]
## + 1/17 vertex, named, from 9ce560a:
## [1] s15
##
## [[56]]
## + 2/17 vertices, named, from 9ce560a:
## [1] s01 s15
##
## [[57]]
## + 3/17 vertices, named, from 9ce560a:
## [1] s01 s05 s15
##
## [[58]]
## + 3/17 vertices, named, from 9ce560a:
## [1] s01 s04 s15
##
## [[59]]
## + 2/17 vertices, named, from 9ce560a:
## [1] s05 s15
##
## [[60]]
## + 2/17 vertices, named, from 9ce560a:
## [1] s04 s15
##
## [[61]]
## + 3/17 vertices, named, from 9ce560a:
## [1] s04 s06 s15
##
## [[62]]
## + 2/17 vertices, named, from 9ce560a:
## [1] s06 s15
##
## [[63]]
## + 1/17 vertex, named, from 9ce560a:
## [1] s02
##
## [[64]]
## + 2/17 vertices, named, from 9ce560a:
## [1] s01 s02
##
## [[65]]
## + 3/17 vertices, named, from 9ce560a:
## [1] s01 s02 s05
##
## [[66]]
## + 4/17 vertices, named, from 9ce560a:
## [1] s01 s02 s03 s05
##
## [[67]]
## + 3/17 vertices, named, from 9ce560a:
## [1] s01 s02 s03
##
## [[68]]
## + 2/17 vertices, named, from 9ce560a:
## [1] s02 s10
##
## [[69]]
## + 3/17 vertices, named, from 9ce560a:
## [1] s02 s09 s10
##
## [[70]]
## + 3/17 vertices, named, from 9ce560a:
## [1] s02 s03 s10
##
## [[71]]
## + 2/17 vertices, named, from 9ce560a:
## [1] s02 s05
##
## [[72]]
## + 3/17 vertices, named, from 9ce560a:
## [1] s02 s05 s09
##
## [[73]]
## + 3/17 vertices, named, from 9ce560a:
## [1] s02 s03 s05
##
## [[74]]
## + 2/17 vertices, named, from 9ce560a:
## [1] s02 s09
##
## [[75]]
## + 2/17 vertices, named, from 9ce560a:
## [1] s02 s03
sapply(cliques(net.sym), length) # clique sizes
## [1] 1 1 1 1 1 2 2 1 2 2 1 2 1 2 2 1 2 1 2 2 1 2 3 2 3 2 1 2 3 2 2 3 2 1 2 3 2 3
## [39] 3 2 2 2 1 2 3 2 2 1 2 3 2 3 2 2 1 2 3 3 2 2 3 2 1 2 3 4 3 2 3 3 2 3 3 2 2
largest_cliques(net.sym) # cliques with max number of nodes
## [[1]]
## + 4/17 vertices, named, from 9ce560a:
## [1] s02 s03 s05 s01
vcol <- rep("grey80", vcount(net.sym))
vcol[unlist(largest_cliques(net.sym))] <- "gold"
plot(as.undirected(net.sym), vertex.label=V(net.sym)$name, vertex.color=vcol)
许多算法旨在检测由密集连接的节点组成的群,这些节点在群之间的连接较少。
Community detection based on edge betweenness (Newman-Girvan), 基于边缘之间性(Newman-Girvan)的社区检测算法依次去除高中间性边缘(每一步重新计算) ,并选择网络的最佳分割。
ceb <- cluster_edge_betweenness(net)
## Warning in cluster_edge_betweenness(net): At
## core/community/edge_betweenness.c:493 : Membership vector will be selected based
## on the highest modularity score.
## Warning in cluster_edge_betweenness(net): At
## core/community/edge_betweenness.c:500 : Modularity calculation with weighted
## edge betweenness community detection might not make sense -- modularity treats
## edge weights as similarities while edge betwenness treats them as distances.
dendPlot(ceb, mode="hclust")
plot(ceb, net)
查看具体结果
membership(ceb)
## s01 s02 s03 s04 s05 s06 s07 s08 s09 s10 s11 s12 s13 s14 s15 s16 s17
## 1 2 3 4 1 4 3 3 5 5 4 4 4 4 1 4 4