You’ll verify for yourself that PageRank works by performing calculations on a small universe of web pages.
Let’s use the 6 page universe that we had in the course notes. For this directed graph, perform the following calculations in R.
# A matrix
r1 <- c(0, 1/2, 1/2, 0, 0, 0)
r2 <- rep(0, 6)
r3 <- c(1/3, 1/3, 0, 0, 1/3, 0)
r4 <- c(0, 0, 0, 0, 1/2, 1/2)
r5 <- c(0, 0, 0, 1/2, 0, 1/2)
r6 <- c(0, 0, 0, 1, 0, 0)
(A <- matrix(c(r1, r2, r3, r4, r5, r6), 6, byrow = T))
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] 0.0000000 0.5000000 0.5 0.0 0.0000000 0.0
## [2,] 0.0000000 0.0000000 0.0 0.0 0.0000000 0.0
## [3,] 0.3333333 0.3333333 0.0 0.0 0.3333333 0.0
## [4,] 0.0000000 0.0000000 0.0 0.0 0.5000000 0.5
## [5,] 0.0000000 0.0000000 0.0 0.5 0.0000000 0.5
## [6,] 0.0000000 0.0000000 0.0 1.0 0.0000000 0.0
# Notice node 2 has no outlink, which is known as "dangling node". In order to stabilize the convergence we will address this by replacing the vectors in node 2 with 1/6, since there are six web pages and 1/6 is the the probability of having equal chance of landing in any of the 6 web pages.
r2_new <- rep(1/6, 6)
(A <- matrix(c(r1, r2_new, r3, r4, r5, r6), 6, byrow = T))
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] 0.0000000 0.5000000 0.5000000 0.0000000 0.0000000 0.0000000
## [2,] 0.1666667 0.1666667 0.1666667 0.1666667 0.1666667 0.1666667
## [3,] 0.3333333 0.3333333 0.0000000 0.0000000 0.3333333 0.0000000
## [4,] 0.0000000 0.0000000 0.0000000 0.0000000 0.5000000 0.5000000
## [5,] 0.0000000 0.0000000 0.0000000 0.5000000 0.0000000 0.5000000
## [6,] 0.0000000 0.0000000 0.0000000 1.0000000 0.0000000 0.0000000
# introduce decay
d <- 0.85
n <- nrow(A)
(B <- d * A + (1 - d) / n)
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] 0.0250000 0.4500000 0.4500000 0.0250000 0.0250000 0.0250000
## [2,] 0.1666667 0.1666667 0.1666667 0.1666667 0.1666667 0.1666667
## [3,] 0.3083333 0.3083333 0.0250000 0.0250000 0.3083333 0.0250000
## [4,] 0.0250000 0.0250000 0.0250000 0.0250000 0.4500000 0.4500000
## [5,] 0.0250000 0.0250000 0.0250000 0.4500000 0.0250000 0.4500000
## [6,] 0.0250000 0.0250000 0.0250000 0.8750000 0.0250000 0.0250000
library(matrixcalc)
# Uniform rank vector
r <- rep(1 / n, n)
# Ten iterations
matrix.power(t(B), 10) %*% r
## [,1]
## [1,] 0.05205661
## [2,] 0.07428990
## [3,] 0.05782138
## [4,] 0.34797267
## [5,] 0.19975859
## [6,] 0.26810085
# Twenty iterations
matrix.power(t(B), 20) %*% r
## [,1]
## [1,] 0.05170616
## [2,] 0.07368173
## [3,] 0.05741406
## [4,] 0.34870083
## [5,] 0.19990313
## [6,] 0.26859408
# Twenty iterations
matrix.power(t(B), 30) %*% r
## [,1]
## [1,] 0.05170475
## [2,] 0.07367927
## [3,] 0.05741242
## [4,] 0.34870367
## [5,] 0.19990381
## [6,] 0.26859607
# Thirty iterations
matrix.power(t(B), 40) %*% r
## [,1]
## [1,] 0.05170475
## [2,] 0.07367926
## [3,] 0.05741241
## [4,] 0.34870369
## [5,] 0.19990381
## [6,] 0.26859608
# Forty iterations
matrix.power(t(B), 50) %*% r
## [,1]
## [1,] 0.05170475
## [2,] 0.07367926
## [3,] 0.05741241
## [4,] 0.34870369
## [5,] 0.19990381
## [6,] 0.26859608
# convergence has occured by 40 iterations
convergence <- matrix.power(t(B), 40) %*% r
(eigen_d <- eigen(B))
## $values
## [1] 1.00000000 0.57619235 -0.42500001 -0.42499999 -0.34991524 -0.08461044
##
## $vectors
## [,1] [,2] [,3] [,4] [,5]
## [1,] -0.4082483 -0.7278031 -5.345224e-01 5.345225e-01 -0.795670150
## [2,] -0.4082483 -0.3721164 -5.216180e-09 -5.216180e-09 0.059710287
## [3,] -0.4082483 -0.5389259 5.345225e-01 -5.345225e-01 0.602762996
## [4,] -0.4082483 0.1174605 -2.672613e-01 2.672612e-01 0.002611877
## [5,] -0.4082483 0.1174605 -2.672613e-01 2.672612e-01 0.002611877
## [6,] -0.4082483 0.1174605 5.345225e-01 -5.345224e-01 0.002611877
## [,6]
## [1,] 0.486246420
## [2,] -0.673469294
## [3,] 0.556554233
## [4,] -0.009145393
## [5,] -0.009145393
## [6,] -0.009145393
# confirm the largest eigenvalue is 1
(eigen_d <- as.numeric(eigen(B)$values))
## [1] 1.00000000 0.57619235 -0.42500001 -0.42499999 -0.34991524 -0.08461044
(max(eigen_d))
## [1] 1
# vectors corresponding to eigenvalue 1
(eigen_d <- as.numeric(eigen(B)$vectors[,which.max(eigen(B)$values)]))
## [1] -0.4082483 -0.4082483 -0.4082483 -0.4082483 -0.4082483 -0.4082483
# normalize
(eigen_nor <- (1/sum(eigen_d))*eigen_d)
## [1] 0.1666667 0.1666667 0.1666667 0.1666667 0.1666667 0.1666667
# sum of normalize eigen vectors
(sum(eigen_nor))
## [1] 1
library(igraph)
##
## Attaching package: 'igraph'
## The following object is masked from 'package:matrixcalc':
##
## %s%
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
A <- matrix(c(r1, r2, r3, r4, r5, r6), 6, byrow = T)
graph_A <- graph_from_adjacency_matrix(A, weighted=TRUE)
plot(graph_A)
# verification after rounded to 5th decimal point
(pageRank <- as.matrix(page.rank(graph_A)$vector))
## [,1]
## [1,] 0.05170475
## [2,] 0.07367926
## [3,] 0.05741241
## [4,] 0.34870369
## [5,] 0.19990381
## [6,] 0.26859608
round(convergence, 5) == round(pageRank, 5)
## [,1]
## [1,] TRUE
## [2,] TRUE
## [3,] TRUE
## [4,] TRUE
## [5,] TRUE
## [6,] TRUE