## Warning: package 'tibble' was built under R version 3.6.2
## Warning: package 'tidyr' was built under R version 3.6.2
## Warning: package 'purrr' was built under R version 3.6.2
# 建立一個空的圖
g<-graph.empty(directed = F)
# 增加node
g<-add.vertices(g, 3)
# 增加edge
g<-add.edges(g, c(c(1, 2,
1, 3,
2, 3,
2, 1,
3, 2,
1, 1)))
# 設定node的feature
V(g)$label <- c("a", "b", "c")
V(g)$color <- c('red', 'blue', 'orange')
V(g)$size <- c(20, 40, 60)
V(g)$member <- c(1, 2, 1)
# 篩選
V(g)[which(V(g)$member == 1)]## + 2/3 vertices, from bedfd20:
## [1] 1 3
## + 3/3 vertices, from bedfd20:
## [1] 1 2 3
# 設定edge的feature
E(g)$weight <- c(2, 8, 13, 4, 11, 3)
# 內建E(g)$weight。但是沒有label項,用set_vertex_attr加入edge label
g<-set_vertex_attr(g, "name", value = V(g)$label)
E(g)## + 6/6 edges from bedfd20 (vertex names):
## [1] a--b a--c b--c a--b b--c a--a
plot(g,
layout = layout.fruchterman.reingold,
edge.arrow.size = 0.4,
vertex.color = V(g)$color,
vertex.size = V(g)$size,
edge.width = E(g)$weight)g <- graph.data.frame(data.frame(
id1 = c('Bob', 'Mark', 'Red', 'Mat', 'White', 'White', 'Bob'),
di2 = c('Red', 'White', 'Mat', 'Blue', 'Bob', 'Mark', 'Mat')))
# 等同於
# g <- graph(edges = c('Bob' , 'Red',
# 'Mark' , 'White',
# 'Red' , 'Mat',
# 'Mat' , 'Blue',
# 'White', 'Bob',
# 'White', 'Mark',
# 'Bob' , 'Mat'), directed = TRUE)
plot(g,
layout = layout.fruchterman.reingold,
edge.arrow.size = 0.4,
vertex.color = 'lightblue',
vertex.size = 25)## 6 x 6 sparse Matrix of class "dgCMatrix"
## Bob Mark Red Mat White Blue
## Bob . . 1 1 . .
## Mark . . . . 1 .
## Red . . . 1 . .
## Mat . . . . . 1
## White 1 1 . . . .
## Blue . . . . . .
# 隨便建造的一個矩陣
adjm <- matrix(sample(0:1, 100, replace = TRUE, prob = c(0.9, 0.1)), nc = 10)
## 對矩陣行列命名
rownames(adjm) <- sample(letters, nrow(adjm))
colnames(adjm) <- rownames(adjm)
# matrix轉成graph
g1 <- graph_from_adjacency_matrix(adjm, weighted = TRUE,
mode = "undirected", add.rownames = TRUE)
# weighted是否需要加入權重
# mode有directed, undirected, upper, lower, max, min, plus有這麼幾種
# add.rownames或add.colnames定義名稱
g1## IGRAPH 653e607 UNW- 10 15 --
## + attr: name (v/c), TRUE (v/c), weight (e/n)
## + edges from 653e607 (vertex names):
## [1] s--s s--m s--p s--q s--l t--f t--q t--e m--e r--l r--e f--y q--q q--l
## [15] y--e
plot(g1,
layout = layout.fruchterman.reingold,
edge.arrow.size = 0.4,
vertex.color = 'white',
vertex.size = 25)## IGRAPH dfe5e0e U--- 4 3 -- Full graph
## + attr: name (g/c), loops (g/l)
## + edges from dfe5e0e:
## [1] 1--4 2--4 3--4
g <- make_tree(40, children = 3, mode = "undirected")
# 建構模型
# 其他分群方法
# cluster_label_prop
# cluster_fast_greedy
# spinglass.community
ceb <- cluster_edge_betweenness(g); ceb ## IGRAPH clustering edge betweenness, groups: 7, mod: 0.68
## + groups:
## $`1`
## [1] 1 4 13 38 39 40
##
## $`2`
## [1] 2 6 7 17 18 19 20 21 22
##
## $`3`
## [1] 3 9 10 26 27 28 29 30 31
##
## $`4`
## + ... omitted several groups/vertices
library(networkD3)
data(MisLinks)
data(MisNodes)
forceNetwork(Links = MisLinks, Nodes = MisNodes, Source = "source",
Target = "target", Value = "value", NodeID = "name",
Group = "group", opacity = 0.4, zoom = TRUE)MyNodes = data.frame(name = c("Google", "Apple", "Amazon", "Youtube", "Paypal"),
group = c("A", "B", "C", "A", "C"),
size = c(100, 25, 9, 1, 4))
# 描述圈圈間的聯結,source必定大於target
MyLinks = data.frame(source = c(1, 2, 2, 3),
target = c(0, 0, 1, 0),
value = c(1, 2, 5, 10)) # value代表線的粗細
forceNetwork(Links = MyLinks, Nodes = MyNodes, Source = "source",
Target = "target", Value = "value", NodeID = "name",
Group = "group", opacity = 1, zoom = TRUE,
Nodesize = "size", opacityNoHover = 1) # opacityNoHover 顯示Node ID學生互動與成績的關聯
dta <- read.table("student.txt", header = T, sep = ",", row.names = 1)
dta[is.na(dta)] <- 0
head(dta)# 將成績分成四個等級,成績越好顏色越紅
exam <- read.csv("examscore.csv", header = F) %>%
mutate(beh = cut(V2, breaks = quantile(V2, probs = c(0, .25, .5, .75, 1))),
col = as.character(factor(beh, labels = c(rev(heat.colors(4))))))g1 <- graph_from_adjacency_matrix(data.matrix(dta), mode = "undirected")
plot(g1,
layout = layout.fruchterman.reingold,
edge.arrow.size = 0.4,
vertex.color = exam$col,
vertex.label.color = "black",
vertex.size = degree(g1))## [1] 500 240
# Correlations來畫圖
big5Graph <- qgraph(cor(big5), minimum = 0.3, groups = big5groups,
legend = TRUE, borders = TRUE, title = "Big 5 correlations")因素分析程序:決定因素數目、因素抽取、轉軸、解釋
## Parallel analysis suggests that the number of factors = NA and the number of components = 15
# 15
# 決定因素數後,試作因素分析
# 因素抽取設定為主軸因子法,轉軸是最優轉軸法(斜交)
rst <- fa(big5, nfactor = 15, fm = 'pa', rotate = "promax")## Loading required namespace: GPArotation
將因素分析結果畫成圖
loadings = as.data.frame(unclass(rst$loadings))
t <- loadings %>%
mutate(subject = row.names(loadings)) %>%
gather(factor, loading, -subject)
Graph <- qgraph(t, minimum = 0.3, groups = big5groups,
legend = TRUE, borders = TRUE, title = "")t <- t %>%
mutate(grp = substr(subject,1,1),
filter = ifelse(loading >= 0.3, 1, 0))
library(lattice)
stripplot(loading ~ grp | factor, data = t,
groups = filter,
jitter = T, layout = c(3, 2))## Loading required namespace: GPArotation
# 太長了用圖表示
# print.psych(rst, cut = .3, sort = TRUE)
loadings = as.data.frame(unclass(rst$loadings))
t <- loadings %>%
mutate(subject = row.names(loadings)) %>%
gather(factor, loading, -subject)
Graph <- qgraph(t, minimum = 0.3, groups = big5groups,
legend = TRUE, borders = TRUE, title = "")t <- t %>%
mutate(grp = substr(subject,1,1),
filter = ifelse(loading >= 0.3, 1, 0))
stripplot(loading ~ grp | factor, data = t,
groups = filter,
jitter = T, layout = c(3, 2))group <- list()
group$S <- seq(1, 12, 1)
group$R <- seq(13, 30, 1)
group$W <- seq(31, 36, 1)
# Correlations來畫圖
Graph <- qgraph(cor(dta), minimum = 0.4, groups = group,
legend = TRUE, borders = TRUE, title = "")## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was
## done
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was
## done
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was
## done
## Warning in cor.smooth(r): Matrix was not positive definite, smoothing was
## done
## In factor.scores, the correlation matrix is singular, an approximation is used
## Warning in cor.smooth(r): Matrix was not positive definite, smoothing was
## done
## Parallel analysis suggests that the number of factors = NA and the number of components = 4
# 4
names(dta) <- c('說心情', '講話困難', '不會表現', '與人相處', '認識交談', '保持聯繫', '重要成員', '被看重的', '有貢獻的', '喜歡我', '重視意見', '喜歡表現', '全力以赴', '不放棄', '解決問題', '達成目標', '找尋資源', '承擔後果', '問題解決', '鼓勵自己', '保持平靜', '往好處想', '理想冒險', '相信幫我', '友人關心', '別人快樂', '常給建議', '感謝支持', '學習技能', '感激別人', '充滿喜悅', '神采飛揚', '快樂的人', '生活沈穩', '滿意生活', '生活豐盛')
rst <- fa(dta, nfactor = 4, fm = 'pa', rotate = "promax")## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was
## done
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was
## done
## In factor.scores, the correlation matrix is singular, an approximation is used
## Warning in cor.smooth(r): Matrix was not positive definite, smoothing was
## done
## Factor Analysis using method = pa
## Call: fa(r = dta, nfactors = 4, rotate = "promax", fm = "pa")
## Standardized loadings (pattern matrix) based upon correlation matrix
## item PA1 PA2 PA3 PA4 h2 u2 com
## 學習技能 29 0.89 0.68 0.32 1.1
## 感謝支持 28 0.82 0.58 0.42 1.1
## 解決問題 15 0.82 0.57 0.43 1.2
## 不放棄 14 0.76 0.56 0.44 1.1
## 感激別人 30 0.75 0.55 0.45 1.0
## 達成目標 16 0.69 0.59 0.41 1.1
## 承擔後果 18 0.69 0.33 0.67 1.1
## 理想冒險 23 0.66 0.33 0.56 0.44 1.7
## 問題解決 19 0.65 0.38 0.62 1.1
## 全力以赴 13 0.63 0.63 0.37 1.3
## 找尋資源 17 0.60 0.56 0.44 1.1
## 鼓勵自己 20 0.55 0.46 0.54 1.3
## 保持平靜 21 0.49 0.30 0.70 1.3
## 別人快樂 26 0.47 0.55 0.45 1.5
## 常給建議 27 0.46 0.35 0.48 0.52 2.2
## 往好處想 22 0.44 0.29 0.71 1.8
## 友人關心 25 0.35 0.33 0.47 0.53 3.2
## 滿意生活 35 0.99 0.82 0.18 1.0
## 生活豐盛 36 0.99 0.82 0.18 1.0
## 生活沈穩 34 0.91 0.73 0.27 1.0
## 神采飛揚 32 0.77 0.72 0.28 1.1
## 快樂的人 33 0.73 0.59 0.41 1.1
## 充滿喜悅 31 0.69 0.38 0.80 0.20 1.7
## 喜歡我 10 0.97 0.80 0.20 1.1
## 被看重的 8 0.83 0.66 0.34 1.1
## 重視意見 11 0.81 0.66 0.34 1.1
## 重要成員 7 0.78 0.64 0.36 1.4
## 有貢獻的 9 0.67 0.61 0.39 1.2
## 相信幫我 24 0.48 0.57 0.43 1.7
## 喜歡表現 12 0.45 0.49 0.51 1.5
## 不會表現 3 0.81 0.70 0.30 1.0
## 認識交談 5 0.70 0.51 0.49 1.0
## 講話困難 2 0.67 0.40 0.60 1.1
## 說心情 1 0.59 0.40 0.60 1.1
## 與人相處 4 0.57 0.56 0.44 1.2
## 保持聯繫 6 0.49 0.33 0.67 1.4
##
## PA1 PA2 PA3 PA4
## SS loadings 7.56 4.57 4.55 3.63
## Proportion Var 0.21 0.13 0.13 0.10
## Cumulative Var 0.21 0.34 0.46 0.56
## Proportion Explained 0.37 0.23 0.22 0.18
## Cumulative Proportion 0.37 0.60 0.82 1.00
##
## With factor correlations of
## PA1 PA2 PA3 PA4
## PA1 1.00 0.55 0.61 0.55
## PA2 0.55 1.00 0.47 0.48
## PA3 0.61 0.47 1.00 0.54
## PA4 0.55 0.48 0.54 1.00
##
## Mean item complexity = 1.3
## Test of the hypothesis that 4 factors are sufficient.
##
## The degrees of freedom for the null model are 630 and the objective function was 49.91 with Chi Square of 6795.73
## The degrees of freedom for the model are 492 and the objective function was 27.3
##
## The root mean square of the residuals (RMSR) is 0.05
## The df corrected root mean square of the residuals is 0.05
##
## The harmonic number of observations is 150 with the empirical chi square 423.05 with prob < 0.99
## The total number of observations was 150 with Likelihood Chi Square = 3644.11 with prob < 0
##
## Tucker Lewis Index of factoring reliability = 0.331
## RMSEA index = 0.207 and the 90 % confidence intervals are 0.201 0.214
## BIC = 1178.87
## Fit based upon off diagonal values = 0.99
## Measures of factor score adequacy
## PA1 PA2 PA3 PA4
## Correlation of (regression) scores with factors 0.97 0.98 0.97 0.95
## Multiple R square of scores with factors 0.95 0.95 0.93 0.90
## Minimum correlation of possible factor scores 0.89 0.91 0.87 0.79
loadings = as.data.frame(unclass(rst$loadings))
t <- loadings %>%
rename("復原力" = 1, "滿意感" = 2, "自尊一" = 3, "自尊二" = 4) %>%
mutate(s = row.names(loadings)) %>%
gather(value, target, -s)
par(family = "黑體-繁 中黑")
Graph <- qgraph(t, minimum = 0.2, groups = group,
legend = TRUE, borders = TRUE, title = "",
vertex.label.family = "黑體-繁 中黑")## Warning in qgraph(t, minimum = 0.2, groups = group, legend = TRUE, borders
## = TRUE, : The following arguments are not documented and likely not
## arguments of qgraph and thus ignored: vertex.label.family
# 所有商品
all.items = c(as.character(dat[, 1]),
as.character(dat[, 2]),
as.character(dat[, 3]),
as.character(dat[, 4]),
as.character(dat[, 5]),
as.character(dat[, 6]))
# 所有種類
levels.all = levels(as.factor(all.items))
# 去掉 ""
levels.all = levels.all[-1]
# 建立架構
MyNodes = data.frame(matrix(NA, ncol = 3, nrow = length(levels.all)))
# 命名
colnames(MyNodes) = c("name", "group", "size")
MyNodes[, 1] = levels.all
# output的商品與類別
out.items = as.character(dat[, 6])
levels.out = levels(dat[, 6])
# 依照分類填入group
MyNodes[levels.all %in% levels.out, 2] = "Output"
MyNodes[!levels.all %in% levels.out, 2] = "Input"
# size都設100
MyNodes[, 3] = c(rep(5, 5), rep(10, 10), rep(100, 5))
head(MyNodes)# 將商品數字化
for (i in 1:6) {
dat[, i] = as.integer(factor(dat[, i], levels = levels.all)) - 1
}
# 逐行做成 大的node|小的node|lift 格式
Links = NULL
for (i in 1:nrow(dat)) {
n.rule = sum(!is.na(dat[i, 1:5]))
new.rule = matrix(NA, ncol = 3, nrow = n.rule)
for (k in 1:n.rule) {
item1 = dat[i, k]
item2 = dat[i, 6]
new.rule[k, 1] = max(item1, item2)
new.rule[k, 2] = min(item1, item2)
new.rule[k, 3] = dat[i, 9]
}
Links = rbind(Links, new.rule)
}
#將重複的相加在一起
dup.MyLinks = Links[duplicated(Links[, 1:2]), ]
nondup.MyLinks = Links[!duplicated(Links[, 1:2]), ]
for (i in 1:nrow(dup.MyLinks)) {
dup.pos = which(nondup.MyLinks[, 1] == dup.MyLinks[i, 1] &
nondup.MyLinks[, 2] == dup.MyLinks[i, 2])
nondup.MyLinks[dup.pos, 3] = nondup.MyLinks[dup.pos, 3] + dup.MyLinks[i, 3]
}
#重新格式化
colnames(nondup.MyLinks) = c("source", "target", "value")
MyLinks = data.frame(nondup.MyLinks)
head(MyLinks)forceNetwork(Links = MyLinks, Nodes = MyNodes, Source = "source",
Target = "target", Value = "value", NodeID = "name",
Group = "group", opacity = 1, zoom = TRUE, Nodesize = "size",
# colourScale = JS("d3.scale.category10()"),
legend = TRUE, fontSize = 20, bounded = FALSE,
charge = -120, opacityNoHover = 0.6, linkDistance = 150)library(corrplot)
library(RColorBrewer)
cormatrix.nba = cor(nba)
corrplot(cormatrix.nba, type = "upper", order = "hclust", tl.col = "black", tl.srt = 45)cormatrix.nba[cormatrix.nba < 0.4] = 0
cormatrix.nba[cormatrix.nba == 1] = 0
g <- graph_from_adjacency_matrix(cormatrix.nba, weighted = TRUE, add.rownames = TRUE)
plot(g,
layout = layout.fruchterman.reingold,
edge.arrow.size = 0.2,
vertex.color = 'lightgray', vertex.frame.color = NA,
vertex.size = 15, vertex.label.cex = 0.8)MyNodes <- data.frame(name = rownames(cormatrix.nba),
group = c(1, 2, 2, 2, 2, 1, 2, 2, 3, 3, 3, 3, 1, 1, 1, 2, 2, 1, 2, 1))
cormatrix.nba.t <- cormatrix.nba
rownames(cormatrix.nba.t) <- as.integer(as.factor(rownames(cormatrix.nba))) - 1
colnames(cormatrix.nba.t) <- as.integer(as.factor(rownames(cormatrix.nba))) - 1
library(reshape2)
MyLinks <- melt(cormatrix.nba.t)
colnames(MyLinks) = c("source", "target", "value")
MyLinks <- MyLinks[MyLinks$value != 0, ] # 無關
MyLinks <- MyLinks[MyLinks$value != 1, ] # 自己
forceNetwork(Links = MyLinks, Nodes = MyNodes, Source = "source",
Target = "target", Value = "value", NodeID = "name",
Group = "group", opacity = 1, zoom = TRUE,
# colourScale = JS("d3.scale.category10()"),
legend = TRUE, fontSize = 20, bounded = FALSE,
charge = -120, opacityNoHover = 0.6, linkDistance = 150)dta <- read.table("higgs-social_network.edgelist", header = F)
dta <- dta-1
tmp1 <- dta[dta$V1<50, ]
tmp2 <- dta[dta$V2<50, ]
dta <- merge(tmp1, tmp2)
head(dta)g <- graph.data.frame(dta, directed = TRUE)
plot(g, layout = layout_with_gem,
vertex.size = degree(g), vertex.label = NA, edge.arrow.size = 0.1, )## Warning: Ignoring unknown aesthetics: label
if(!require("devtools")) install.packages("devtools")
library("devtools")
install_github("pablobarbera/scholarnetwork")## Skipping install of 'scholarnetwork' from a github remote, the SHA1 (11f37da8) has not changed since last install.
## Use `force = TRUE` to force installation
extractNetwork wraps the get_publications function from the scholar package, which extracts the list of publications on a Google Scholar profile, cleans it, and then parses the results into a format that is more suitable for network analysis: a data frame of weighted edges, where each edge is a collaboration in a publication, and the weight is one divided by number of co-authors; and a data frame with node-level information, which includes the group resulting from running a walktrap community detection algorithm.
## List of 2
## $ nodes:'data.frame': 129 obs. of 3 variables:
## ..$ label : chr [1:129] "A Heathcote" "A Martinez" "A Takahashi" "A Tyler" ...
## ..$ degree: num [1:129] 0.5 0.833 0.8 0.75 0.857 ...
## ..$ group : num [1:129] 19 14 13 1 18 2 8 2 2 14 ...
## $ edges:'data.frame': 482 obs. of 3 variables:
## ..$ node1 : chr [1:482] "C Sheu" "N Todd" "S Mcmahon" "M Ohnishi" ...
## ..$ node2 : chr [1:482] "A Heathcote" "A Martinez" "A Martinez" "A Takahashi" ...
## ..$ weight: num [1:482] 0.5 0.167 0.167 0.2 0.2 ...
# cleaning network data
network <- graph_from_data_frame(d$edges, directed = FALSE)
set.seed(123)
l <- layout.fruchterman.reingold(network, niter = 1500) # layout
fc <- walktrap.community(network) # community detection
# node locations
nodes <- data.frame(l); names(nodes) <- c("x", "y")
nodes$cluster <- factor(fc$membership)
nodes$label <- fc$names
nodes$degree <- degree(network)
# edge locations
edgelist <- get.edgelist(network, names = FALSE)
edges <- data.frame(nodes[edgelist[, 1], c("x", "y")], nodes[edgelist[, 2], c("x", "y")])
names(edges) <- c("x1", "y1", "x2", "y2")
# and now visualizing it...
p <- ggplot(nodes, aes(x = x, y = y, color = cluster, label = label, size = degree))
pq <- p +
# nodes
geom_point(color = "grey20", aes(fill = cluster),
shape = 21, show.legend = FALSE, alpha = 1/2) +
geom_text(color = "black", aes(label = label, size = degree),
show.legend = FALSE, alpha = 2/3) +
# edges
geom_segment(
aes(x = x1, y = y1, xend = x2, yend = y2, label = NA),
data = edges, size = 0.25, color = "grey20", alpha = 1/5) +
## note that here I add a border to the points
scale_fill_discrete(labels = labels) +
scale_size_continuous(range = c(2, 8)) +
theme(
panel.background = element_rect(fill = "white"),
plot.background = element_rect(fill = "white"),
axis.line = element_blank(), axis.text = element_blank(),
axis.ticks = element_blank(),
axis.title = element_blank(), panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.background = element_rect(colour = F, fill = "black"),
legend.key = element_rect(fill = "black", colour = F),
legend.title = element_text(color = "white"),
legend.text = element_text(color = "white")
) +
## changing size of points in legend
guides(fill = guide_legend(override.aes = list(size = 5)))## Warning: Ignoring unknown aesthetics: label
it is difficult to make sure labels do not overlap. A probably better option is to export the network data to a format that Gephi can read, and then edit it manually in Gephi, as shown below.
df <- data.frame(Source = d$edges$node1, Target = d$edges$node2)
write.csv(df, file = "edgelist-gephi.csv", row.names = FALSE)