解决Rstudio不能显示中文的办法

getwd()
## [1] "/Users/huanghuilin/Desktop/360安全云盘同步版/2022/2022/网络数据的统计分析:R语言实战"
library(showtext)
showtext_auto()

指数随机图模型

一般形式

指数随机图的基本形式如下: \[P_{\theta}(Y=y)=\frac{1}{C}\exp\{\sum_{H}\theta_{H}g_{H}(y)\}~~~(6.1)\] 其中:

将igraph对象lazega转化为statnet格式数据,首先将网络转化为邻接矩阵和相关属性:

library(igraph)
library(sand)
data(lazega)
A<-as_adjacency_matrix(lazega) #输出邻接矩阵
v.attrs<-as_data_frame(lazega,what="vertices") #输出各个节点属性
v.attrs
##     name Seniority Status Gender Office Years Age Practice School
## V1    V1         1      1      1      1    31  64        1      1
## V2    V2         2      1      1      1    32  62        2      1
## V3    V3         3      1      1      2    13  67        1      1
## V4    V4         4      1      1      1    31  59        2      3
## V5    V5         5      1      1      2    31  59        1      2
## V6    V6         6      1      1      2    29  55        1      1
## V7    V7         7      1      1      2    29  63        2      3
## V8    V8         8      1      1      1    28  53        1      3
## V9    V9         9      1      1      1    25  53        2      1
## V10  V10        10      1      1      1    25  53        2      3
## V11  V11        11      1      1      1    23  50        1      1
## V12  V12        12      1      1      1    24  52        2      2
## V13  V13        13      1      1      1    22  57        1      2
## V14  V14        14      1      1      2     1  56        2      1
## V15  V15        15      1      1      3    21  48        2      3
## V16  V16        16      1      1      1    20  46        2      1
## V17  V17        17      1      1      1    23  50        2      1
## V18  V18        18      1      1      2    18  45        1      2
## V19  V19        19      1      1      1    19  46        2      1
## V20  V20        20      1      1      1    19  49        1      1
## V21  V21        21      1      1      1    17  43        1      2
## V22  V22        22      1      1      1     9  49        1      3
## V23  V23        23      1      1      1    16  45        1      2
## V24  V24        24      1      1      1    15  44        1      2
## V25  V25        25      1      1      2    15  43        2      2
## V26  V26        26      1      1      1    15  41        1      3
## V27  V27        27      1      2      1    13  47        1      1
## V28  V28        28      1      1      2    11  38        2      2
## V29  V29        29      1      2      1    10  38        2      3
## V30  V30        30      1      1      2     7  39        1      3
## V31  V31        31      1      1      2     8  34        1      2
## V32  V32        32      1      1      2     8  33        1      3
## V33  V33        33      1      1      2     8  37        1      3
## V34  V34        34      1      2      1     8  36        2      2
## V35  V35        35      1      1      2     8  33        2      3
## V36  V36        36      1      1      1     5  43        1      3
names(v.attrs)
## [1] "name"      "Seniority" "Status"    "Gender"    "Office"    "Years"    
## [7] "Age"       "Practice"  "School"
plot(lazega,vertex.size=3,vertex.label=NA)

创建ergm的相应网络对象:

library(ergm)
lazega.s<-network::as.network(as.matrix(A),directed=FALSE)
network::set.vertex.attribute(lazega.s,"Office",v.attrs$Office)
network::set.vertex.attribute(lazega.s,"Practice",v.attrs$Practice)
network::set.vertex.attribute(lazega.s,"Gender",v.attrs$Gender)
network::set.vertex.attribute(lazega.s,"Seniority",v.attrs$Seniority)
plot(lazega.s)

模型界定

贝努力随机图模型:给定的节点对之间是否存在边,与其他任何节点对之间边的存在是独立的。

(6.1)式简化为 \[P_{\theta}(Y=y)=\frac{1}{C}\exp\{\sum_{i,j}\theta_{ij}y_{ij}\}~~~(6.3)\]

进一步假设\(\theta_{i,j}=\theta\)为常数,即网络是齐次网络(homogeneity),于是(6.3)进一步简化为 \[P_{\theta}(Y=y)=\frac{1}{C}\exp\{\theta L(y)\}~~~(6.4)\] 其中\(L(y)=\sum_{i,j}y_{ij}=N_{e}\)是图的边数.结果等于一个 \(p=\frac{\exp(\theta)}{1+\exp(\theta)}\)的贝努利随机图模型.

对于网络lazega.s,模型(6.4)可以如下界定

my.ergm.bern<-formula(lazega.s~edges)
my.ergm.bern
## lazega.s ~ edges
#计算统计量L的值,即图中边的数目
summary(my.ergm.bern)
## edges 
##   115

\(S_{k}(y)\)\(T(y)\)分别表示k-star数目和三角形数量,则指数随机图模型可以修正为: \[P_{\theta}(Y=y)=\frac{1}{C}\exp\{\sum_{k=1}^{N_{v}-1}\theta_{k}S_{k}(y)+\theta_{\tau}T(y)\}~~~(6.5)\] 将模型(6.5)应用于lazega.s网络,令\(\theta_{4}=...=\theta_{N_{v}-1}0\),以包括\(k=2,k=3\)的星形数量\(S_{k}\)

my.ergm<-formula(lazega.s~edges+kstar(2)+kstar(3)+triangle)
my.ergm
## lazega.s ~ edges + kstar(2) + kstar(3) + triangle
#计算图中边、2-star、3-star以及三角形triangle的数目
summary(my.ergm)
##    edges   kstar2   kstar3 triangle 
##      115      926     2681      120

经验表明,上述代码的做法构造的模型通常与实际数据拟合很差。通过分析该现象,我们发现它与模型退化问题密切相关。而且不幸的是, 从模型拟合角度出发,纳入充分多高阶项这以替代方案同样存在问题。

解决之一困境的方法:对于\(k\geq 2\)和某些\(\lambda\geq 1\),构建”交替k-star统计量”(alternating k-star statistic) \[AKS_{\lambda}(y)=\sum_{k=1}^{N_{v}-1}(-1)^k\frac{S_{k}(y)}{\lambda^{k-2}}~~(6.6)\] 另一种方案:若模型中包含了边的数目,则定义几何加权度值(geometrically weighted degree count):
\[GWD_{\gamma}(y)=\sum_{d=1}^{N_{v}-1}e^{-\gamma d}N_{d}(y)~~(6.7)\]

其中\(N_{d}(y)\)表示度为\(d\)的节点数目,而\(\gamma>0\)\(\lambda\)有关, \(\gamma=\log\frac{\lambda}{\lambda-1}\).

另一种解决方案:构建一种基于正负号交错的k-三角形之和,定义广义三元结构: \[AKT_{\lambda}(y)=3T_{1}+ \sum_{k=1}^{N_{v}-2}(-1)^{k+1}\frac{T_{k}(y)}{\lambda^{k-1}}~~(6.8)\] 此处\(T_{k}\)为k-三角形数量,k-三角形定义为共有一条底边的k个三角形。

上述三种统计量分别用ergm包中的函数altkstar、gwdegree或gwesp实现:

# lambda=1或2,Alternating k-star
my.ergm.altkstar.1<-formula(lazega.s~edges+altkstar(lambda=1,fixed=TRUE))
summary(my.ergm.altkstar.1)
##      edges altkstar.1 
##        115        196
my.ergm.altkstar.2<-formula(lazega.s~edges+altkstar(lambda=2,fixed=TRUE))
summary(my.ergm.altkstar.2)
##      edges altkstar.2 
##   115.0000   335.3453
# geometrically weighted degree
my.ergm.gwdegree<-formula(lazega.s~edges+gwdegree)
summary(my.ergm.gwdegree)
##       edges  gwdegree#1  gwdegree#2  gwdegree#3  gwdegree#4  gwdegree#5 
##         115           3           2           4           2           4 
##  gwdegree#6  gwdegree#7  gwdegree#8  gwdegree#9 gwdegree#10 gwdegree#11 
##           4           1           1           5           1           1 
## gwdegree#12 gwdegree#13 gwdegree#14 gwdegree#15 gwdegree#16 gwdegree#17 
##           2           3           0           1           0           0 
## gwdegree#18 gwdegree#19 gwdegree#20 gwdegree#21 gwdegree#22 gwdegree#23 
##           0           0           0           0           0           0 
## gwdegree#24 gwdegree#25 gwdegree#26 gwdegree#27 gwdegree#28 gwdegree#29 
##           0           0           0           0           0           0 
## gwdegree#30 
##           0
# lambda=1或2,gwesp
my.ergm.gwesp.1<-formula(lazega.s~edges+gwesp(1,fixed=TRUE))
summary(my.ergm.gwesp.1)
##         edges gwesp.fixed.1 
##      115.0000      213.1753

最后我们考虑网络中节点间的外生效应,令 \[g(y,x)=\sum_{1\leq i<j\leq N_{v}}y_{ij}h(x_{i},x_{j})~~~(6.9)\]

其中\(h\)\(x_{i}\)\(x_{j}\)的对称函数,而\(x_{i}\)(或\(x_{j}\))是第i个(或j)个节点的观测属性向量。事实上,若\(h\)度量了属性的某种相似性,则(6.9)式中的统计量评估了网络中邻居的总体相似性。 若h表示主效应main effects,其形式为: \[h(x_{i},x_{j})=x_{i}+x_{j};\] 若h表示为二阶效应second-order effects(或称为相似性、同质性效应homophily effects),其形式为: \[h(x,y)=I\{x_{i}=x_{j}\}.\] 主效应和二阶效应分别通过ergm中的nodemain和nodematch实现。

综上,最后界定模型如下: \[P_{\theta,\beta}(Y=y|X=x)=\frac{1}{c(\theta,\beta)} \exp\{\theta_{1}S_{1}(y)+\theta_{2}AKT_{\lambda}(y)+\beta^Tg(y,x)\}~~(6.10)\] \(g\)是包含5个属性统计量的向量,\(\beta\)是对应的参数向量。

将模型(6.10)用于lazega.s网络:

lazega.ergm<-formula(lazega.s~edges
                  +gwesp(log(3),fixed=TRUE)
                  +nodemain("Seniority")
                  +nodemain("Practice")
                  +match("Practice")
                  +match("Gender")
                  +match("Office")
                      )

模型拟合

使用ergm包中的ergm函数实现参数的马尔可夫蒙特卡洛极大似然估计如下:

set.seed(42)
lazega.ergm.fit<-ergm(lazega.ergm)
summary(lazega.ergm.fit)
## Call:
## ergm(formula = lazega.ergm)
## 
## Monte Carlo Maximum Likelihood Results:
## 
##                               Estimate Std. Error MCMC % z value Pr(>|z|)    
## edges                        -6.966710   0.723892      0  -9.624  < 1e-04 ***
## gwesp.fixed.1.09861228866811  0.592816   0.091828      0   6.456  < 1e-04 ***
## nodecov.Seniority             0.024416   0.006633      0   3.681 0.000232 ***
## nodecov.Practice              0.394350   0.109541      0   3.600 0.000318 ***
## nodematch.Practice            0.765364   0.201127      0   3.805 0.000142 ***
## nodematch.Gender              0.715513   0.254257      0   2.814 0.004891 ** 
## nodematch.Office              1.143581   0.198585      0   5.759  < 1e-04 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##      Null Deviance: 873.4  on 630  degrees of freedom
##  Residual Deviance: 458.9  on 623  degrees of freedom
##  
## AIC: 472.9  BIC: 504.1  (Smaller is better. MC Std. Err. = 0.3403)
anova(lazega.ergm.fit)
## Analysis of Variance Table
## 
## Model 1: lazega.s ~ edges + gwesp(log(3), fixed = TRUE) + nodemain("Seniority") + 
##     nodemain("Practice") + match("Practice") + match("Gender") + 
##     match("Office")
##          Df Deviance Resid. Df Resid. Dev Pr(>|Chisq|)    
## NULL                       630     873.37                 
## Model 1:  7   414.43       623     458.94    < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

拟合优度

为了检验ergm对(6.10)中模型拟合的拟合优度,使用ergm包中的gof()函数可以进行蒙特卡洛模拟和计算,从而与原始网络图比较度、捷径长度、每边共有合作者的分布。

gof.lazega.ergm<-gof(lazega.ergm.fit)
gof.lazega.ergm
## 
## Goodness-of-fit for degree 
## 
##          obs min mean max MC p-value
## degree0    2   0 2.35   9       1.00
## degree1    3   0 2.95   9       0.94
## degree2    2   0 2.90   8       0.94
## degree3    4   0 2.78   8       0.64
## degree4    2   0 2.78  11       0.94
## degree5    4   0 2.64   8       0.54
## degree6    4   0 2.63   8       0.52
## degree7    1   0 3.10   8       0.34
## degree8    1   0 2.78   7       0.44
## degree9    5   0 2.49   7       0.22
## degree10   1   0 2.38   7       0.70
## degree11   1   0 1.62   5       1.00
## degree12   2   0 1.54   5       0.88
## degree13   3   0 1.06   7       0.18
## degree14   0   0 0.76   4       0.92
## degree15   1   0 0.48   3       0.82
## degree16   0   0 0.35   2       1.00
## degree17   0   0 0.21   2       1.00
## degree18   0   0 0.11   2       1.00
## degree19   0   0 0.04   1       1.00
## degree20   0   0 0.02   1       1.00
## degree21   0   0 0.02   1       1.00
## degree23   0   0 0.01   1       1.00
## 
## Goodness-of-fit for edgewise shared partner 
## 
##       obs min  mean max MC p-value
## esp0    5   1  8.39  21       0.48
## esp1   16   3 14.95  33       0.80
## esp2   29   8 19.77  39       0.10
## esp3   17   6 21.66  44       0.58
## esp4   23   0 19.17  41       0.66
## esp5   11   0 13.96  30       0.76
## esp6   10   0  8.24  22       0.78
## esp7    4   0  4.23  15       1.00
## esp8    0   0  2.28  12       0.64
## esp9    0   0  0.93   7       1.00
## esp10   0   0  0.39   4       1.00
## esp11   0   0  0.10   1       1.00
## esp12   0   0  0.03   1       1.00
## esp13   0   0  0.01   1       1.00
## 
## Goodness-of-fit for minimum geodesic distance 
## 
##     obs min   mean max MC p-value
## 1   115  59 114.11 154       0.96
## 2   275 113 261.98 356       0.88
## 3   148  57 133.43 197       0.58
## 4    21   0  26.91  91       0.94
## 5     2   0   4.27  46       0.84
## 6     0   0   0.62  19       1.00
## 7     0   0   0.11   6       1.00
## 8     0   0   0.03   2       1.00
## Inf  69   0  88.54 302       1.00
## 
## Goodness-of-fit for model statistics 
## 
##                                    obs        min      mean      max MC p-value
## edges                         115.0000   59.00000  114.1100  154.000       0.96
## gwesp.fixed.1.09861228866811  222.9108   74.39506  221.4946  342.278       1.00
## nodecov.Seniority            4687.0000 2141.00000 4641.2000 6514.000       0.98
## nodecov.Practice              359.0000  190.00000  358.0900  472.000       0.96
## nodematch.Practice             72.0000   44.00000   71.3200   98.000       0.94
## nodematch.Gender               99.0000   51.00000   97.9800  134.000       0.98
## nodematch.Office               85.0000   48.00000   84.2100  112.000       1.00
par(mfrow=c(2,2))
plot(gof.lazega.ergm, main="Goodness-of-fit Diagnostics")

结果表明在这些特征上,模型拟合效果总体上很好。

网络块模型

模型界定

假设图\(G=(V,E)\)的每个节点\(i\in V\)属于\(Q\)个类别\({\cal{S}}_{1},...,{\cal{S}}_{Q}\) 的一个。设每个节点\(i\)的类标签\(q=q(i)\).G的块模型规定:在给定节点\(i\)\(j\)的类标签 q和r的条件下,邻接矩阵\(Y\)的每个元素\(Y_{ij}\)是一个概率为\(\pi_{qr}\)的独立二项随机变量。对于无向图,\(\pi_{qr}=\pi_{rq}\).

块模型是贝努力随机图模型的一个变种,其中一条边的概率被限制为\(Q^2\)个可能值\(\pi_{qr}\)之一。块模型可表达为: \[P_{\theta}=\frac{1}{C}\exp\{\sum_{q,r}\theta_{qr}L_{qr}(y)\}.\] 其中\(L_{qr}(y)\)是观测图\(y\)连接类别\(q\)\(r\)的节点对的边的数量。

事实上,我们更常见的是随机块模型(stochastic block model),若节点\(i\)属于类\(q\),记\(Z_{iq}=1\),若不属于该类则为0.在随机块模型中,向量 \(Z_{i}=(Z_{i1},...,Z_{iQ})\)独立产生,其中

\[P(Z_{iq}=1)=\alpha_{q},~~\sum_{q}\alpha_{q}=1.\]

模型拟合

对于非随机块模型,只需要估计边的概率参数\(\pi_{qr}\),而极大似然估计就是对应的经验频率。

对于随机块模型,边的条件概率\(\pi_{qr}\)和类别概率\(\alpha_{q}\)需要同时进行估计。我们可以利用blockmodels包中的BM_bernoulli函数拟合法国的政治博客网络的随机块模型:

library(blockmodels)
set.seed(42)
A.fblog<-as.matrix(as_adjacency_matrix(fblog))
plot(fblog,vertex.size=5,vertex.label=NA)

fblog.sbm <- BM_bernoulli("SBM_sym", A.fblog,
                 verbosity=0, plotting='')
fblog.sbm
## blockmodels object
##     model: bernoulli 
##     membership: SBM_sym 
##     network: 192 x 192 scalar network 
##     Estimation not done.
##     Run $estimate(). You can specify a reinitialization effort, by default 1.
fblog.sbm$estimate()
ICLs <- fblog.sbm$ICL #积分分类似然 integration classification likelihood
ICLs
##  [1] -5028.312 -4580.705 -4318.101 -4118.934 -3986.373 -3865.908 -3810.839
##  [8] -3768.287 -3742.687 -3720.026 -3725.161 -3752.341 -3783.983 -3833.919
## [15] -3886.516
Q <- which.max(ICLs)
Q
## [1] 10
Z <- fblog.sbm$memberships[[Q]]$Z  #计算每个节点属于各个类别的概率矩阵


cl.labs <- apply(Z,1,which.max) #计算每个节点属于各个类别的最大值

nv <- vcount(fblog)
Z[cbind(1:nv,cl.labs)]
##   [1] 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320
##   [8] 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320
##  [15] 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320
##  [22] 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320 0.8585590
##  [29] 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320
##  [36] 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320
##  [43] 0.9953320 0.9953320 0.9953320 0.9851703 0.9953320 0.9953320 0.9953320
##  [50] 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320
##  [57] 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320
##  [64] 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320
##  [71] 0.9953320 0.9953320 0.9953320 0.9953320 0.9952600 0.9953320 0.9953320
##  [78] 0.9953320 0.9953320 0.9953320 0.9953320 0.9928643 0.9953320 0.9940801
##  [85] 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320
##  [92] 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320
##  [99] 0.9953320 0.9953320 0.9953320 0.9953320 0.8722307 0.9953320 0.9953320
## [106] 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320
## [113] 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320
## [120] 0.9953320 0.9856524 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320
## [127] 0.9953320 0.9953320 0.9953320 0.9834812 0.9953320 0.9953320 0.9953320
## [134] 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320
## [141] 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320
## [148] 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320
## [155] 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320
## [162] 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320
## [169] 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320
## [176] 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320
## [183] 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320 0.9953320
## [190] 0.9953320 0.9953320 0.9953320
summary(Z[cbind(1:nv,cl.labs)])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.8586  0.9953  0.9953  0.9938  0.9953  0.9953
cl.cnts <- as.vector(table(cl.labs))  #计算各个类别节点数目
cl.cnts 
##  [1] 35 27 11 21 24 25  6  7 13 23
alpha <- cl.cnts/nv  #计算各个类别节点频数
alpha
##  [1] 0.18229167 0.14062500 0.05729167 0.10937500 0.12500000 0.13020833
##  [7] 0.03125000 0.03645833 0.06770833 0.11979167
Pi.mat <- fblog.sbm$model_parameters[[Q]]$pi #计算各类别之间边的条件概率矩阵pi
Pi.mat 
##              [,1]         [,2]         [,3]         [,4]         [,5]
##  [1,] 0.083611401 0.0026475735 0.0030340287 0.0153540703 0.0040491832
##  [2,] 0.002647574 0.2124783425 0.0073657690 0.0005307515 0.0020409052
##  [3,] 0.003034029 0.0073657690 0.9102251927 0.0009221811 0.0009170384
##  [4,] 0.015354070 0.0005307515 0.0009221811 0.3491010126 0.0006552089
##  [5,] 0.004049183 0.0020409052 0.0009170384 0.0006552089 0.5430460926
##  [6,] 0.005804370 0.0048290270 0.0364593875 0.0024459962 0.0087663557
##  [7,] 0.105118625 0.0138547459 0.0177621832 0.4293284245 0.0293873144
##  [8,] 0.061680023 0.6147987219 0.0024976022 0.0687191280 0.0076806603
##  [9,] 0.086843749 0.0343015470 0.0431732528 0.0810833089 0.0934576716
## [10,] 0.031302358 0.0006517433 0.0012495852 0.0910635932 0.0043791190
##              [,6]       [,7]        [,8]       [,9]        [,10]
##  [1,] 0.005804370 0.10511862 0.061680023 0.08684375 0.0313023577
##  [2,] 0.004829027 0.01385475 0.614798722 0.03430155 0.0006517433
##  [3,] 0.036459388 0.01776218 0.002497602 0.04317325 0.0012495852
##  [4,] 0.002445996 0.42932842 0.068719128 0.08108331 0.0910635932
##  [5,] 0.008766356 0.02938731 0.007680660 0.09345767 0.0043791190
##  [6,] 0.341171854 0.04089890 0.085362959 0.10830472 0.0041423382
##  [7,] 0.040898897 0.90617331 0.191006641 0.37978670 0.7764194397
##  [8,] 0.085362959 0.19100664 0.973769651 0.38171930 0.0633538096
##  [9,] 0.108304718 0.37978670 0.381719300 0.38270058 0.0714640995
## [10,] 0.004142338 0.77641944 0.063353810 0.07146410 0.5454915285
rowSums(Pi.mat)
##  [1] 0.3994454 0.8934991 1.0236062 1.0392037 0.6943795 0.6381859 2.8897363
##  [8] 2.4505885 1.6628349 1.5895176
colSums(Pi.mat)
##  [1] 0.3994454 0.8934991 1.0236062 1.0392037 0.6943795 0.6381859 2.8897363
##  [8] 2.4505885 1.6628349 1.5895176

拟合优度

ntrials <- 1000
Pi.mat <- (t(Pi.mat)+Pi.mat)/2
deg.summ <- list(ntrials)
for(i in (1:ntrials)){
blk.sz <- rmultinom(1,nv,alpha)
g.sbm <- sample_sbm(nv,pref.matrix=Pi.mat,
                   block.sizes=blk.sz,directed=FALSE)
deg.summ[[i]] <- summary(degree(g.sbm))
}

Reduce('+',deg.summ)/ntrials
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.951   9.137  13.100  15.157  18.849  49.612
summary(degree(fblog))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2.00    8.00   13.00   14.91   18.00   56.00
plot(fblog.sbm$ICL,xlab="Q",ylab="ICL",type="b",
      main="integrated conditional likelihood图")
lines(c(Q,Q),c(min(ICLs),max(ICLs)),col="red",lty=2)

edges <- as_edgelist(fblog,names=FALSE)
cl.labs
##   [1]  1  1  1  1  1  1  1  3  3  6  6  6  9  6  6  6  6  6  2  6  6  6  3  6  3
##  [26]  6  6  1  6  3  3  3  6  6  6  6  3  3  6  3  3  6  6  6  6  9  6  2  8  2
##  [51]  8  2  2  2  2  2  2  8  8  9  2  2  2  8  2  2  2  2  2  2  2  2  2  2  8
##  [76]  2  2  2  2 10  1  4 10 10  4  7  4  4 10 10  1  4  4  4  4  7 10 10 10 10
## [101] 10 10  9 10  4  4  4  7  7  4  7 10 10 10 10  4  4  4  4  1  1  9  4 10  7
## [126] 10 10  4  1  1 10 10  4 10  4  4  1  1  1  1  1  1 10  9  1  1  1  1  1  1
## [151]  1  1  1  5  5  1  5  5  5  5  5  5  5  1  1  9  2  9  8  9  9  9  1  9  9
## [176]  1  1  5  5  5  5  5  5  5  5  5  5  5  5  5  5  5
neworder<-order(cl.labs)
neworder
##   [1]   1   2   3   4   5   6   7  28  81  91 120 121 129 130 137 138 139 140
##  [19] 141 142 145 146 147 148 149 150 151 152 153 156 164 165 173 176 177  19
##  [37]  48  50  52  53  54  55  56  57  61  62  63  65  66  67  68  69  70  71
##  [55]  72  73  74  76  77  78  79 167   8   9  23  25  30  31  32  37  38  40
##  [73]  41  82  85  87  88  92  93  94  95 105 106 107 110 116 117 118 119 123
##  [91] 128 133 135 136 154 155 157 158 159 160 161 162 163 178 179 180 181 182
## [109] 183 184 185 186 187 188 189 190 191 192  10  11  12  14  15  16  17  18
## [127]  20  21  22  24  26  27  29  33  34  35  36  39  42  43  44  45  47  86
## [145]  96 108 109 111 125  49  51  58  59  64  75 169  13  46  60 103 122 144
## [163] 166 168 170 171 172 174 175  80  83  84  89  90  97  98  99 100 101 102
## [181] 104 112 113 114 115 124 126 127 131 132 134 143
m<-t(matrix(order(neworder)[as.numeric(edges)],2))
plot(1, 1, xlim = c(0, nv + 1), ylim = c(nv + 1, 0),
     type = "n", axes= FALSE, xlab="Classes",
     ylab="Classes",
     main="Reorganized Adjacency matrix")
rect(m[,2]-0.5,m[,1]-0.5,m[,2]+0.5,m[,1]+0.5,col=1)
rect(m[,1]-0.5,m[,2]-0.5,m[,1]+0.5,m[,2]+0.5,col=1)
cl.lim <- cl.cnts
cl.lim <- cumsum(cl.lim)[1:(length(cl.lim)-1)]+0.5
clip(0,nv+1,nv+1,0)
abline(v=c(0.5,cl.lim,nv+0.5),h=c(0.5,cl.lim,nv+0.5),col="red")

g.cl <- graph_from_adjacency_matrix(Pi.mat,
              mode="undirected",
              weighted=TRUE)

#Set necessary parameters
vsize <- 100*sqrt(alpha)
ewidth <- 10*E(g.cl)$weight
PolP <- V(fblog)$PolParty
class.by.PolP <- as.matrix(table(cl.labs,PolP))
pie.vals <- lapply(1:Q, function(i)
         as.vector(class.by.PolP[i,]))
my.cols <- topo.colors(length(unique(PolP)))

# Plot
plot(g.cl, edge.width=ewidth,
vertex.shape="pie", vertex.pie=pie.vals,
vertex.pie.color=list(my.cols),
vertex.size=vsize, vertex.label.dist=0.1*vsize,
vertex.label.degree=pi)


# Add a legend
my.names <- names(table(PolP))
my.names[2] <- "Comm. Anal."
my.names[5] <- "PR de G"
legend(x="topleft", my.names,
      fill=my.cols, bty="n")

潜变量网络模型

R扩展包eigenmodel包的用法:

library(eigenmodel)
data(lazega)
plot(lazega,vertex.size=3,vertex.label=NA)

summary(lazega)
## IGRAPH 3e8b2bf UN-- 36 115 -- 
## + attr: name (v/c), Seniority (v/n), Status (v/n), Gender (v/n), Office
## | (v/n), Years (v/n), Age (v/n), Practice (v/n), School (v/n)
vcount(lazega)
## [1] 36
ecount(lazega)
## [1] 115
# 首先拟合不包含成对协变量,潜变量特征空间维度Q=2的模型
set.seed(42)
A <-get.adjacency(lazega,sparse = FALSE)
lazega.leig.fit1<-eigenmodel_mcmc(A,R=2,s=11000,burn=10000)
## 5 percent done (iteration 550) Sun Apr  3 23:05:26 2022
## 10 percent done (iteration 1100) Sun Apr  3 23:05:27 2022
## 15 percent done (iteration 1650) Sun Apr  3 23:05:29 2022
## 20 percent done (iteration 2200) Sun Apr  3 23:05:30 2022
## 25 percent done (iteration 2750) Sun Apr  3 23:05:31 2022
## 30 percent done (iteration 3300) Sun Apr  3 23:05:33 2022
## 35 percent done (iteration 3850) Sun Apr  3 23:05:34 2022
## 40 percent done (iteration 4400) Sun Apr  3 23:05:35 2022
## 45 percent done (iteration 4950) Sun Apr  3 23:05:37 2022
## 50 percent done (iteration 5500) Sun Apr  3 23:05:38 2022
## 55 percent done (iteration 6050) Sun Apr  3 23:05:40 2022
## 60 percent done (iteration 6600) Sun Apr  3 23:05:41 2022
## 65 percent done (iteration 7150) Sun Apr  3 23:05:42 2022
## 70 percent done (iteration 7700) Sun Apr  3 23:05:44 2022
## 75 percent done (iteration 8250) Sun Apr  3 23:05:45 2022
## 80 percent done (iteration 8800) Sun Apr  3 23:05:46 2022
## 85 percent done (iteration 9350) Sun Apr  3 23:05:48 2022
## 90 percent done (iteration 9900) Sun Apr  3 23:05:49 2022
## 95 percent done (iteration 10450) Sun Apr  3 23:05:51 2022
## 100 percent done (iteration 11000) Sun Apr  3 23:05:52 2022
summary(lazega.leig.fit1)
##              Length Class  Mode   
## Z_postmean   1296   -none- numeric
## ULU_postmean 1296   -none- numeric
## Y_postmean   1296   -none- numeric
## L_postsamp   2000   -none- numeric
## b_postsamp      0   -none- numeric
## Y            1296   -none- numeric
## X               0   -none- logical
## S               1   -none- numeric
#创建包括共同执业类型效应协变量的模型
same.prac.op<-v.attr.lazega$Practice %o% v.attr.lazega$Practice
same.prac<-matrix(as.numeric(same.prac.op %in% c(1,4,9)),36,36)
same.prac<-array(same.prac,dim=c(36,36,1))
lazega.leig.fit2<-eigenmodel_mcmc(A,same.prac,R=2,s=11000,burn=10000)
## 5 percent done (iteration 550) Sun Apr  3 23:05:53 2022
## 10 percent done (iteration 1100) Sun Apr  3 23:05:55 2022
## 15 percent done (iteration 1650) Sun Apr  3 23:05:56 2022
## 20 percent done (iteration 2200) Sun Apr  3 23:05:58 2022
## 25 percent done (iteration 2750) Sun Apr  3 23:05:59 2022
## 30 percent done (iteration 3300) Sun Apr  3 23:06:00 2022
## 35 percent done (iteration 3850) Sun Apr  3 23:06:02 2022
## 40 percent done (iteration 4400) Sun Apr  3 23:06:03 2022
## 45 percent done (iteration 4950) Sun Apr  3 23:06:05 2022
## 50 percent done (iteration 5500) Sun Apr  3 23:06:06 2022
## 55 percent done (iteration 6050) Sun Apr  3 23:06:08 2022
## 60 percent done (iteration 6600) Sun Apr  3 23:06:09 2022
## 65 percent done (iteration 7150) Sun Apr  3 23:06:10 2022
## 70 percent done (iteration 7700) Sun Apr  3 23:06:12 2022
## 75 percent done (iteration 8250) Sun Apr  3 23:06:13 2022
## 80 percent done (iteration 8800) Sun Apr  3 23:06:15 2022
## 85 percent done (iteration 9350) Sun Apr  3 23:06:16 2022
## 90 percent done (iteration 9900) Sun Apr  3 23:06:17 2022
## 95 percent done (iteration 10450) Sun Apr  3 23:06:19 2022
## 100 percent done (iteration 11000) Sun Apr  3 23:06:20 2022
summary(lazega.leig.fit2)
##              Length Class  Mode   
## Z_postmean   1296   -none- numeric
## ULU_postmean 1296   -none- numeric
## Y_postmean   1296   -none- numeric
## L_postsamp   2000   -none- numeric
## b_postsamp   1000   -none- numeric
## Y            1296   -none- numeric
## X            1296   -none- numeric
## S               1   -none- numeric
#创建包括共同办公地点效应协变量的模型
same.off.op<-v.attr.lazega$Office %o% v.attr.lazega$Office
same.off<-matrix(as.numeric(same.off.op %in% c(1,4,9)),36,36)
same.off<-array(same.off,dim=c(36,36,1))
lazega.leig.fit3<-eigenmodel_mcmc(A,same.off,R=2,s=11000,burn=10000)
## 5 percent done (iteration 550) Sun Apr  3 23:06:22 2022
## 10 percent done (iteration 1100) Sun Apr  3 23:06:23 2022
## 15 percent done (iteration 1650) Sun Apr  3 23:06:25 2022
## 20 percent done (iteration 2200) Sun Apr  3 23:06:26 2022
## 25 percent done (iteration 2750) Sun Apr  3 23:06:27 2022
## 30 percent done (iteration 3300) Sun Apr  3 23:06:29 2022
## 35 percent done (iteration 3850) Sun Apr  3 23:06:30 2022
## 40 percent done (iteration 4400) Sun Apr  3 23:06:32 2022
## 45 percent done (iteration 4950) Sun Apr  3 23:06:33 2022
## 50 percent done (iteration 5500) Sun Apr  3 23:06:34 2022
## 55 percent done (iteration 6050) Sun Apr  3 23:06:36 2022
## 60 percent done (iteration 6600) Sun Apr  3 23:06:37 2022
## 65 percent done (iteration 7150) Sun Apr  3 23:06:39 2022
## 70 percent done (iteration 7700) Sun Apr  3 23:06:40 2022
## 75 percent done (iteration 8250) Sun Apr  3 23:06:42 2022
## 80 percent done (iteration 8800) Sun Apr  3 23:06:43 2022
## 85 percent done (iteration 9350) Sun Apr  3 23:06:44 2022
## 90 percent done (iteration 9900) Sun Apr  3 23:06:46 2022
## 95 percent done (iteration 10450) Sun Apr  3 23:06:47 2022
## 100 percent done (iteration 11000) Sun Apr  3 23:06:49 2022
summary(lazega.leig.fit3)
##              Length Class  Mode   
## Z_postmean   1296   -none- numeric
## ULU_postmean 1296   -none- numeric
## Y_postmean   1296   -none- numeric
## L_postsamp   2000   -none- numeric
## b_postsamp   1000   -none- numeric
## Y            1296   -none- numeric
## X            1296   -none- numeric
## S               1   -none- numeric
#提取三个模型的特征向量
lat.sp.1 <-eigen(lazega.leig.fit1$ULU_postmean)$vec[, 1:2]
lat.sp.2 <-eigen(lazega.leig.fit2$ULU_postmean)$vec[, 1:2]
lat.sp.3 <-eigen(lazega.leig.fit3$ULU_postmean)$vec[, 1:2]
lat.sp.1
##              [,1]         [,2]
##  [1,]  0.02990368 -0.031097080
##  [2,]  0.12360901  0.046985526
##  [3,] -0.16416298  0.182068548
##  [4,]  0.09063237  0.017435163
##  [5,] -0.26053965  0.068890133
##  [6,] -0.15711409 -0.270987310
##  [7,] -0.07193516  0.080802113
##  [8,] -0.01395859 -0.237595036
##  [9,]  0.21582273 -0.156326519
## [10,]  0.07036050 -0.104364493
## [11,]  0.04858516  0.157267661
## [12,]  0.35216932  0.100492212
## [13,] -0.17149222 -0.308880863
## [14,] -0.04743153 -0.148354846
## [15,]  0.12257371  0.149584940
## [16,]  0.23934799 -0.113353648
## [17,]  0.15801340  0.066549418
## [18,] -0.34021549  0.449327135
## [19,]  0.09727958  0.184912472
## [20,]  0.07641420  0.047362028
## [21,]  0.02233143  0.128481438
## [22,]  0.05503723  0.014035518
## [23,] -0.01509686  0.058716088
## [24,] -0.01094847 -0.006771444
## [25,] -0.08005897  0.102026813
## [26,]  0.15811406  0.136272979
## [27,]  0.07765744  0.102536706
## [28,] -0.18489696  0.036517684
## [29,]  0.29367451 -0.272512136
## [30,] -0.14978291 -0.306589695
## [31,] -0.24227728 -0.270425047
## [32,] -0.13930505 -0.068374926
## [33,] -0.26463662 -0.075627043
## [34,]  0.24060855  0.055454459
## [35,] -0.13712502  0.193063343
## [36,]  0.06173106  0.059088983
lat.sp.2 
##               [,1]         [,2]
##  [1,]  0.057711133  0.173485117
##  [2,]  0.144886135 -0.023346133
##  [3,] -0.189608493  0.288087872
##  [4,]  0.068560475  0.065738514
##  [5,] -0.229062736 -0.206996594
##  [6,] -0.165320717  0.189243578
##  [7,] -0.073769818 -0.070852528
##  [8,] -0.006091445  0.097218277
##  [9,]  0.176968594 -0.090243789
## [10,]  0.051698324 -0.008885727
## [11,]  0.052084297  0.111192400
## [12,]  0.297478371  0.038088404
## [13,] -0.123518524 -0.188703947
## [14,] -0.088101270  0.083331941
## [15,]  0.133216122 -0.134831770
## [16,]  0.242061990 -0.158824063
## [17,]  0.119187788  0.190018881
## [18,] -0.326001664 -0.255532637
## [19,]  0.060129224  0.127161379
## [20,]  0.120867474 -0.090243149
## [21,]  0.059532775 -0.092899024
## [22,]  0.094655477 -0.093065156
## [23,]  0.006996613 -0.125904245
## [24,]  0.012601496 -0.023479732
## [25,] -0.152784964  0.292253453
## [26,]  0.200303784 -0.054452753
## [27,]  0.132090158 -0.107867756
## [28,] -0.301112703  0.294538943
## [29,]  0.265564083  0.173707080
## [30,] -0.158435195  0.204056053
## [31,] -0.246424516 -0.151164826
## [32,] -0.130794472 -0.072848443
## [33,] -0.200179290 -0.363788043
## [34,]  0.196815063  0.200724780
## [35,] -0.188564845  0.052546448
## [36,]  0.119860653 -0.243663125
lat.sp.3
##               [,1]         [,2]
##  [1,] -0.054272080  0.044301391
##  [2,] -0.114262277 -0.299000372
##  [3,] -0.060437136 -0.254056367
##  [4,]  0.006515628  0.088965766
##  [5,]  0.306042224 -0.040487007
##  [6,]  0.078912335  0.307303693
##  [7,] -0.019400474 -0.361393520
##  [8,]  0.005909152 -0.209180189
##  [9,] -0.256369307 -0.025834488
## [10,]  0.068190419  0.059664909
## [11,] -0.070218270  0.025274076
## [12,] -0.270505782  0.137007419
## [13,]  0.326984921 -0.075803462
## [14,] -0.180703427 -0.038478428
## [15,] -0.067544842  0.285798408
## [16,] -0.197142311  0.108526416
## [17,] -0.221780160  0.070900415
## [18,]  0.181909143 -0.377306879
## [19,] -0.062534144  0.174923680
## [20,]  0.023005938  0.136492606
## [21,]  0.051128653  0.031953532
## [22,]  0.037897851  0.066741694
## [23,]  0.031826927 -0.117734761
## [24,]  0.164445673  0.271546946
## [25,] -0.193421672 -0.206773293
## [26,] -0.053895191  0.155786521
## [27,]  0.037784780  0.003844418
## [28,] -0.002126842  0.016407049
## [29,] -0.238330192 -0.024586262
## [30,] -0.029502635  0.151097136
## [31,]  0.354223437  0.114292001
## [32,]  0.128986400  0.142452742
## [33,]  0.395196234 -0.052593844
## [34,] -0.174296689  0.048256544
## [35,]  0.002393447 -0.042605284
## [36,]  0.040044875  0.143938044
#使用上述坐标绘图
colbar <- c("red", "dodgerblue", "goldenrod")
v.colors <- colbar[V(lazega)$Office]
v.shapes <- c("circle", "square")[V(lazega)$Practice]
v.size <- 3.5*sqrt(V(lazega)$Years)
v.label <- V(lazega)$Seniority
plot(lazega, layout=lat.sp.1, vertex.color=v.colors,
    vertex.shape=v.shapes, vertex.size=v.size,
    vertex.label=(1:36))

plot(lazega, layout=lat.sp.2, vertex.color=v.colors,
    vertex.shape=v.shapes, vertex.size=v.size,
    vertex.label=(1:36))

plot(lazega, layout=lat.sp.3, vertex.color=v.colors,
    vertex.shape=v.shapes, vertex.size=v.size,
    vertex.label=(1:36))

拟合优度

对lazega数据进行k-fold交叉验证:

#取下三角矩阵
perm.index <- sample(1:630)
nfolds <- 5
nmiss <- 630/nfolds
Avec <- A[lower.tri(A)]
Avec.pred1 <- numeric(length(Avec))

# 

for(i in seq(1,nfolds)){
# Index of missing values.
miss.index <- seq(((i-1) * nmiss + 1),
          (i*nmiss), 1)
A.miss.index <- perm.index[miss.index]

# Fill a new Atemp appropriately with NA’s.
Avec.temp <- Avec
Avec.temp[A.miss.index] <-
        rep("NA", length(A.miss.index))
Avec.temp <- as.numeric(Avec.temp)
Atemp <- matrix(0, 36, 36)
Atemp[lower.tri(Atemp)] <- Avec.temp
Atemp <- Atemp + t(Atemp)

# Now fit model and predict.
Y <- Atemp

model1.fit <- eigenmodel_mcmc(Y, R=2,
           S=11000, burn=10000)
model1.pred <- model1.fit$Y_postmean
model1.pred.vec <-
model1.pred[lower.tri(model1.pred)]
Avec.pred1[A.miss.index] <-
model1.pred.vec[A.miss.index]


}
## 5 percent done (iteration 1050) Sun Apr  3 23:06:52 2022
## 10 percent done (iteration 2100) Sun Apr  3 23:06:54 2022
## 15 percent done (iteration 3150) Sun Apr  3 23:06:57 2022
## 20 percent done (iteration 4200) Sun Apr  3 23:07:00 2022
## 25 percent done (iteration 5250) Sun Apr  3 23:07:02 2022
## 30 percent done (iteration 6300) Sun Apr  3 23:07:05 2022
## 35 percent done (iteration 7350) Sun Apr  3 23:07:08 2022
## 40 percent done (iteration 8400) Sun Apr  3 23:07:10 2022
## 45 percent done (iteration 9450) Sun Apr  3 23:07:13 2022
## 50 percent done (iteration 10500) Sun Apr  3 23:07:16 2022
## 55 percent done (iteration 11550) Sun Apr  3 23:07:18 2022
## 60 percent done (iteration 12600) Sun Apr  3 23:07:21 2022
## 65 percent done (iteration 13650) Sun Apr  3 23:07:23 2022
## 70 percent done (iteration 14700) Sun Apr  3 23:07:26 2022
## 75 percent done (iteration 15750) Sun Apr  3 23:07:29 2022
## 80 percent done (iteration 16800) Sun Apr  3 23:07:32 2022
## 85 percent done (iteration 17850) Sun Apr  3 23:07:34 2022
## 90 percent done (iteration 18900) Sun Apr  3 23:07:37 2022
## 95 percent done (iteration 19950) Sun Apr  3 23:07:40 2022
## 100 percent done (iteration 21000) Sun Apr  3 23:07:42 2022
## 5 percent done (iteration 1050) Sun Apr  3 23:07:45 2022
## 10 percent done (iteration 2100) Sun Apr  3 23:07:47 2022
## 15 percent done (iteration 3150) Sun Apr  3 23:07:50 2022
## 20 percent done (iteration 4200) Sun Apr  3 23:07:53 2022
## 25 percent done (iteration 5250) Sun Apr  3 23:07:55 2022
## 30 percent done (iteration 6300) Sun Apr  3 23:07:58 2022
## 35 percent done (iteration 7350) Sun Apr  3 23:08:00 2022
## 40 percent done (iteration 8400) Sun Apr  3 23:08:03 2022
## 45 percent done (iteration 9450) Sun Apr  3 23:08:06 2022
## 50 percent done (iteration 10500) Sun Apr  3 23:08:08 2022
## 55 percent done (iteration 11550) Sun Apr  3 23:08:11 2022
## 60 percent done (iteration 12600) Sun Apr  3 23:08:14 2022
## 65 percent done (iteration 13650) Sun Apr  3 23:08:16 2022
## 70 percent done (iteration 14700) Sun Apr  3 23:08:19 2022
## 75 percent done (iteration 15750) Sun Apr  3 23:08:22 2022
## 80 percent done (iteration 16800) Sun Apr  3 23:08:24 2022
## 85 percent done (iteration 17850) Sun Apr  3 23:08:27 2022
## 90 percent done (iteration 18900) Sun Apr  3 23:08:30 2022
## 95 percent done (iteration 19950) Sun Apr  3 23:08:32 2022
## 100 percent done (iteration 21000) Sun Apr  3 23:08:35 2022
## 5 percent done (iteration 1050) Sun Apr  3 23:08:37 2022
## 10 percent done (iteration 2100) Sun Apr  3 23:08:40 2022
## 15 percent done (iteration 3150) Sun Apr  3 23:08:43 2022
## 20 percent done (iteration 4200) Sun Apr  3 23:08:45 2022
## 25 percent done (iteration 5250) Sun Apr  3 23:08:48 2022
## 30 percent done (iteration 6300) Sun Apr  3 23:08:51 2022
## 35 percent done (iteration 7350) Sun Apr  3 23:08:53 2022
## 40 percent done (iteration 8400) Sun Apr  3 23:08:56 2022
## 45 percent done (iteration 9450) Sun Apr  3 23:08:58 2022
## 50 percent done (iteration 10500) Sun Apr  3 23:09:01 2022
## 55 percent done (iteration 11550) Sun Apr  3 23:09:04 2022
## 60 percent done (iteration 12600) Sun Apr  3 23:09:07 2022
## 65 percent done (iteration 13650) Sun Apr  3 23:09:09 2022
## 70 percent done (iteration 14700) Sun Apr  3 23:09:12 2022
## 75 percent done (iteration 15750) Sun Apr  3 23:09:14 2022
## 80 percent done (iteration 16800) Sun Apr  3 23:09:17 2022
## 85 percent done (iteration 17850) Sun Apr  3 23:09:20 2022
## 90 percent done (iteration 18900) Sun Apr  3 23:09:22 2022
## 95 percent done (iteration 19950) Sun Apr  3 23:09:25 2022
## 100 percent done (iteration 21000) Sun Apr  3 23:09:28 2022
## 5 percent done (iteration 1050) Sun Apr  3 23:09:30 2022
## 10 percent done (iteration 2100) Sun Apr  3 23:09:33 2022
## 15 percent done (iteration 3150) Sun Apr  3 23:09:36 2022
## 20 percent done (iteration 4200) Sun Apr  3 23:09:38 2022
## 25 percent done (iteration 5250) Sun Apr  3 23:09:41 2022
## 30 percent done (iteration 6300) Sun Apr  3 23:09:44 2022
## 35 percent done (iteration 7350) Sun Apr  3 23:09:46 2022
## 40 percent done (iteration 8400) Sun Apr  3 23:09:49 2022
## 45 percent done (iteration 9450) Sun Apr  3 23:09:52 2022
## 50 percent done (iteration 10500) Sun Apr  3 23:09:54 2022
## 55 percent done (iteration 11550) Sun Apr  3 23:09:57 2022
## 60 percent done (iteration 12600) Sun Apr  3 23:10:00 2022
## 65 percent done (iteration 13650) Sun Apr  3 23:10:02 2022
## 70 percent done (iteration 14700) Sun Apr  3 23:10:05 2022
## 75 percent done (iteration 15750) Sun Apr  3 23:10:08 2022
## 80 percent done (iteration 16800) Sun Apr  3 23:10:10 2022
## 85 percent done (iteration 17850) Sun Apr  3 23:10:13 2022
## 90 percent done (iteration 18900) Sun Apr  3 23:10:16 2022
## 95 percent done (iteration 19950) Sun Apr  3 23:10:19 2022
## 100 percent done (iteration 21000) Sun Apr  3 23:10:21 2022
## 5 percent done (iteration 1050) Sun Apr  3 23:10:24 2022
## 10 percent done (iteration 2100) Sun Apr  3 23:10:27 2022
## 15 percent done (iteration 3150) Sun Apr  3 23:10:29 2022
## 20 percent done (iteration 4200) Sun Apr  3 23:10:32 2022
## 25 percent done (iteration 5250) Sun Apr  3 23:10:35 2022
## 30 percent done (iteration 6300) Sun Apr  3 23:10:38 2022
## 35 percent done (iteration 7350) Sun Apr  3 23:10:40 2022
## 40 percent done (iteration 8400) Sun Apr  3 23:10:43 2022
## 45 percent done (iteration 9450) Sun Apr  3 23:10:45 2022
## 50 percent done (iteration 10500) Sun Apr  3 23:10:48 2022
## 55 percent done (iteration 11550) Sun Apr  3 23:10:51 2022
## 60 percent done (iteration 12600) Sun Apr  3 23:10:54 2022
## 65 percent done (iteration 13650) Sun Apr  3 23:10:57 2022
## 70 percent done (iteration 14700) Sun Apr  3 23:11:00 2022
## 75 percent done (iteration 15750) Sun Apr  3 23:11:02 2022
## 80 percent done (iteration 16800) Sun Apr  3 23:11:05 2022
## 85 percent done (iteration 17850) Sun Apr  3 23:11:08 2022
## 90 percent done (iteration 18900) Sun Apr  3 23:11:10 2022
## 95 percent done (iteration 19950) Sun Apr  3 23:11:13 2022
## 100 percent done (iteration 21000) Sun Apr  3 23:11:16 2022
#绘制ROC曲线
library(ROCR)
pred1 <- prediction(Avec.pred1, Avec)
perf1 <- performance(pred1, "tpr", "fpr")
plot(perf1, col="blue", lwd=3)

perf1.auc <- performance(pred1, "auc")
slot(perf1.auc, "y.values")
## [[1]]
## [1] 0.8133221