You’ll verify for yourself that PageRank works by performing calculations on a small universe of web pages. Let’s use the 6 page universe that we had in the course notes. For this directed graph, perform the following calculations in R. . Form the A matrix. Then, introduce decay and form the B matrix as we did in the course notes.
(A <- matrix(c(0, 0, 1/3, 0, 0, 0,
1/2, 0, 1/3, 0, 0, 0,
1/2, 0, 0, 0, 0, 0,
0, 0, 0, 0, 1/2, 1,
0, 0, 1/3, 1/2, 0, 0,
0, 0, 0, 1/2, 1/2, 0), nrow = 6, byrow = TRUE))
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] 0.0 0 0.3333333 0.0 0.0 0
## [2,] 0.5 0 0.3333333 0.0 0.0 0
## [3,] 0.5 0 0.0000000 0.0 0.0 0
## [4,] 0.0 0 0.0000000 0.0 0.5 1
## [5,] 0.0 0 0.3333333 0.5 0.0 0
## [6,] 0.0 0 0.0000000 0.5 0.5 0
colSums(A)
## [1] 1 0 1 1 1 1
# ensure row sums to one, so the matrix converges properly, replace 'dangling node' row 2 with value 1/6 for each row
(A <- matrix(c(0, 1/6, 1/3, 0, 0, 0,
1/2, 1/6, 1/3, 0, 0, 0,
1/2, 1/6, 0, 0, 0, 0,
0, 1/6, 0, 0, 1/2, 1,
0, 1/6, 1/3, 1/2, 0, 0,
0, 1/6, 0, 1/2, 1/2, 0), nrow = 6, byrow = TRUE))
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] 0.0 0.1666667 0.3333333 0.0 0.0 0
## [2,] 0.5 0.1666667 0.3333333 0.0 0.0 0
## [3,] 0.5 0.1666667 0.0000000 0.0 0.0 0
## [4,] 0.0 0.1666667 0.0000000 0.0 0.5 1
## [5,] 0.0 0.1666667 0.3333333 0.5 0.0 0
## [6,] 0.0 0.1666667 0.0000000 0.5 0.5 0
colSums(A)
## [1] 1 1 1 1 1 1
(B <- 0.85 * A + 0.15/6)
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] 0.025 0.1666667 0.3083333 0.025 0.025 0.025
## [2,] 0.450 0.1666667 0.3083333 0.025 0.025 0.025
## [3,] 0.450 0.1666667 0.0250000 0.025 0.025 0.025
## [4,] 0.025 0.1666667 0.0250000 0.025 0.450 0.875
## [5,] 0.025 0.1666667 0.3083333 0.450 0.025 0.025
## [6,] 0.025 0.1666667 0.0250000 0.450 0.450 0.025
. Start with a uniform rank vector r and perform power iterations on B till convergence. That is, compute the solution r = B\(^n\) × r. Attempt this for a sufficiently large n so that r actually converges.
r <- c(1/6, 1/6, 1/6, 1/6, 1/6, 1/6)
library(expm)
## Loading required package: Matrix
##
## Attaching package: 'expm'
## The following object is masked from 'package:Matrix':
##
## expm
convergence <- 0
i <- 1
while(!convergence & i < 50) {
convergence <- isTRUE(all.equal(B %^% i %*% r, B %^% (i + 1) %*% r)) == 'TRUE'
i = i + 1
}
print(i)
## [1] 32
# the covergence occurs after 32 iterations
(isTRUE(all.equal(B %^% 32 %*% r, B %^% 33 %*% r)))
## [1] TRUE
. Compute the eigen-decomposition of B and verify that you indeed get an eigenvalue of 1 as the largest eigenvalue and that its corresponding eigenvector is the same vector that you obtained in the previous power iteration method. Further, this eigenvector has all positive entries and it sums to 1.
(eigen.val <- eigen(B))
## $values
## [1] 1.00000000+0i 0.57619235+0i -0.42500000+0i -0.42500000-0i
## [5] -0.34991524+0i -0.08461044+0i
##
## $vectors
## [,1] [,2] [,3]
## [1,] 0.1044385+0i 0.2931457+0i 2.945054e-15+5.507002e-22i
## [2,] 0.1488249+0i 0.5093703+0i -1.223015e-15-0.000000e+00i
## [3,] 0.1159674+0i 0.3414619+0i -2.241513e-15-6.032865e-22i
## [4,] 0.7043472+0i -0.5890805+0i -7.071068e-01+0.000000e+00i
## [5,] 0.4037861+0i -0.1413606+0i 7.071068e-01+0.000000e+00i
## [6,] 0.5425377+0i -0.4135367+0i 0.000000e+00-2.145851e-08i
## [,4] [,5] [,6]
## [1,] 2.945054e-15-5.507002e-22i -0.06471710+0i -0.212296003+0i
## [2,] -1.223015e-15+0.000000e+00i 0.01388698+0i 0.854071294+0i
## [3,] -2.241513e-15+6.032865e-22i 0.07298180+0i -0.363638739+0i
## [4,] -7.071068e-01+0.000000e+00i -0.66058664+0i 0.018399984+0i
## [5,] 7.071068e-01-0.000000e+00i 0.73761812+0i -0.304719509+0i
## [6,] 0.000000e+00+2.145851e-08i -0.09918316+0i 0.008182973+0i
# the largest eigenvalue is 1
max.value <- eigen.val$values[which.max(eigen.val$values)]
## Warning in which.max(eigen.val$values): imaginary parts discarded in
## coercion
(isTRUE(all.equal(as.numeric(max.value), 1)))
## [1] TRUE
eigen.vec <- eigen.val$vectors[, which.max(eigen.val$values)]
## Warning in which.max(eigen.val$values): imaginary parts discarded in
## coercion
eigen.vec <- as.numeric(eigen.vec/sum(eigen.vec))
# the eigenvector is the same as the one obtained in the previous power iteration
(isTRUE(all.equal(as.vector(t(B %^% 32 %*% r)), eigen.vec)))
## [1] TRUE
# this eigenvector has all positive entries
if(any(eigen.vec < 0)) stop ("there is negative entries")
# sums to 1
isTRUE(all.equal(sum(eigen.vec), 1))
## [1] TRUE
. Use the graph package in R and its page.rank method to compute the Page Rank of the graph as given in A. Note that you don’t need to apply decay. The package starts with a connected graph and applies decay internally. Verify that you do get the same PageRank vector as the two approaches above.
library(igraph)
##
## Attaching package: 'igraph'
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
# re-create matrix A without considering decay
(A <- matrix(c(0, 0, 1/3, 0, 0, 0,
1/2, 0, 1/3, 0, 0, 0,
1/2, 0, 0, 0, 0, 0,
0, 0, 0, 0, 1/2, 1,
0, 0, 1/3, 1/2, 0, 0,
0, 0, 0, 1/2, 1/2, 0), nrow = 6))
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] 0.0000000 0.5000000 0.5 0.0 0.0000000 0.0
## [2,] 0.0000000 0.0000000 0.0 0.0 0.0000000 0.0
## [3,] 0.3333333 0.3333333 0.0 0.0 0.3333333 0.0
## [4,] 0.0000000 0.0000000 0.0 0.0 0.5000000 0.5
## [5,] 0.0000000 0.0000000 0.0 0.5 0.0000000 0.5
## [6,] 0.0000000 0.0000000 0.0 1.0 0.0000000 0.0
g <- graph_from_adjacency_matrix(A, weighted = T, mode = 'directed')
plot(g)
page.rank(g)$vector
## [1] 0.05170475 0.07367926 0.05741241 0.34870369 0.19990381 0.26859608
# compare to the eigenvector methods
(isTRUE(all.equal(eigen.vec, page.rank(g)$vector)))
## [1] TRUE
# compare to the power iteration method
(isTRUE(all.equal(as.vector(t(B %^% 32 %*% r)), page.rank(g)$vector)))
## [1] TRUE
Reference:
https://rstudio-pubs-static.s3.amazonaws.com/122129_2fea566719494d9b830d26ce210c40e0.html
http://rstudio-pubs-static.s3.amazonaws.com/223356_ae2392ea92fa49b3b3c9e8dcc2b240a5.html
https://github.com/wwells/CUNY_DATA_605/blob/master/Week10/WWells_Assign10.Rmd