library(igraph) # used to compute the Page Rank of the n square matrix AYou’ll verify for yourself that PageRank works by performing calculations on a small universe of web pages.
Let’s use the 6 page universe that we had in the previous discussion For this directed graph, perform the following calculations in R.
a <- c(0 , 1/2, 1/2, 0, 0, 0,
0 , 0, 0, 0, 0, 0,
1/3, 1/3, 0, 0, 1/3, 0,
0 , 0, 0, 0, 1/2, 1/2,
0 , 0, 0, 1/2, 0, 1/2,
0 , 0, 0, 1, 0, 0)
n <- 6 # Use 6 to correspond with the 6 URLs of our test universe
A <- matrix(data = a, nrow = n, ncol = n, byrow = TRUE)
# Print the A matrix
A#> [,1] [,2] [,3] [,4] [,5] [,6]
#> [1,] 0.0000000 0.5000000 0.5 0.0 0.0000000 0.0
#> [2,] 0.0000000 0.0000000 0.0 0.0 0.0000000 0.0
#> [3,] 0.3333333 0.3333333 0.0 0.0 0.3333333 0.0
#> [4,] 0.0000000 0.0000000 0.0 0.0 0.5000000 0.5
#> [5,] 0.0000000 0.0000000 0.0 0.5 0.0000000 0.5
#> [6,] 0.0000000 0.0000000 0.0 1.0 0.0000000 0.0
\(B = d \times A + \frac{(1-d)}{n}\)
d <- 0.85 # chosen decay d = 0.85 (a.k.a damping factor)
B1 <- d * A + ( 1 - d ) / n
B1#> [,1] [,2] [,3] [,4] [,5] [,6]
#> [1,] 0.0250000 0.4500000 0.450 0.025 0.0250000 0.025
#> [2,] 0.0250000 0.0250000 0.025 0.025 0.0250000 0.025
#> [3,] 0.3083333 0.3083333 0.025 0.025 0.3083333 0.025
#> [4,] 0.0250000 0.0250000 0.025 0.025 0.4500000 0.450
#> [5,] 0.0250000 0.0250000 0.025 0.450 0.0250000 0.450
#> [6,] 0.0250000 0.0250000 0.025 0.875 0.0250000 0.025
pageRank <- function(B, MaxIterations = 100) {
urls <- dim(B)[1]
kth_power_B <- B
i <- 0
converged_i <- TRUE
converged <- FALSE
# define initial uniform rank vector r
r <- r_f <- matrix( rep( 1 / urls, urls ), nrow = urls, ncol = 1)
#while ( (converged != TRUE) & (i < MaxIterations) ) {
while ( (converged != converged_i) & (i < MaxIterations) ) {
# perform a new iteration because r_f has not yet converged
converged_i <- converged
# new ith r vector
r_i <- r
# matrix multiplication
kth_power_B < kth_power_B %*% B
# probability of finding a user on a page/url
r <- kth_power_B %*% r
r_f <- r
# increase iteration counter
i <- i + 1
# check if the new r_f vector converged to the previous ith r vector
converged <- all.equal(r_i, r_f)
}
# after the r_f converged, normalize it
r <- r_f / sqrt( sum( r_f ^ 2 ) )
output <- list("Converged" = converged,
"Iterations" = i,
"r" = r,
"Stabilized_B" = kth_power_B)
return(output)
}r_ranking <- pageRank(B1, 100)After \(n = 23\) iterations, the converged \(r_f\) vector is:
r_ranking["r"]#> $r
#> [,1]
#> [1,] 0.2159123
#> [2,] 0.0572329
#> [3,] 0.2980792
#> [4,] 0.5358032
#> [5,] 0.5358032
#> [6,] 0.5358032
The stabilized B matrix is:
r_ranking["Stabilized_B"]#> $Stabilized_B
#> [,1] [,2] [,3] [,4] [,5] [,6]
#> [1,] 0.0250000 0.4500000 0.450 0.025 0.0250000 0.025
#> [2,] 0.0250000 0.0250000 0.025 0.025 0.0250000 0.025
#> [3,] 0.3083333 0.3083333 0.025 0.025 0.3083333 0.025
#> [4,] 0.0250000 0.0250000 0.025 0.025 0.4500000 0.450
#> [5,] 0.0250000 0.0250000 0.025 0.450 0.0250000 0.450
#> [6,] 0.0250000 0.0250000 0.025 0.875 0.0250000 0.025
eigen_decomp <- eigen(B1)
eigen_decomp#> eigen() decomposition
#> $values
#> [1] 0.95165271 -0.42500000 -0.42500000 0.41436582 -0.34733611 -0.01868242
#>
#> $vectors
#> [,1] [,2] [,3] [,4] [,5]
#> [1,] -0.2159123 5.345225e-01 5.345225e-01 -0.77869913 -0.775079691
#> [2,] -0.0572329 -3.580707e-17 -3.124265e-17 -0.07537631 0.009090375
#> [3,] -0.2980792 -5.345225e-01 -5.345225e-01 -0.61034825 0.631781588
#> [4,] -0.5358032 2.672612e-01 2.672612e-01 0.07169632 0.002637034
#> [5,] -0.5358032 2.672612e-01 2.672612e-01 0.07169632 0.002637034
#> [6,] -0.5358032 -5.345225e-01 -5.345225e-01 0.07169632 0.002637034
#> [,6]
#> [1,] -0.55060201
#> [2,] 0.61511270
#> [3,] -0.56386946
#> [4,] 0.01322899
#> [5,] 0.01322899
#> [6,] 0.01322899
maxEigenVal <- max(eigen_decomp$values)
maxEigenVal#> [1] 0.9516527
The maximum value of the eigen-decomposition is not \(1\), but it is close to it.
eigenvec <- eigen_decomp$vectors[,1]
comp_vecs <- data.frame(
"r" = r_ranking$r,
"eigenvector" = eigenvec
)
comp_vecs#> r eigenvector
#> 1 0.2159123 -0.2159123
#> 2 0.0572329 -0.0572329
#> 3 0.2980792 -0.2980792
#> 4 0.5358032 -0.5358032
#> 5 0.5358032 -0.5358032
#> 6 0.5358032 -0.5358032
We notice that the values of the eigenvector are all negative but they have the same magnitude as the values of the \(r\) vector of the power iteration method.
# sum all elements of the eigenvector
sum(eigenvec)#> [1] -2.178634
# sum all elements of the eigenvector in normalized form
sum(1/sum(eigenvec) * eigenvec)#> [1] 1
From the results above, we can see that all the elements of the eigenvector are negative and do not add up to \(1\). However, we can see that the normalized version of the eigenvector does add up to \(1\).
A_graph <- igraph::graph_from_adjacency_matrix( adjmatrix = A, mode = "directed", weighted = TRUE)plot(A_graph)igraphPageRank <- igraph::page.rank(A_graph)
as.matrix(igraphPageRank$vector)#> [,1]
#> [1,] 0.05170475
#> [2,] 0.07367926
#> [3,] 0.05741241
#> [4,] 0.34870369
#> [5,] 0.19990381
#> [6,] 0.26859608
comp_vecs <- data.frame(
"r" = r_ranking$r,
"eigenvector" = eigenvec,
"igraph_vector" = igraphPageRank$vector
)
comp_vecs#> r eigenvector igraph_vector
#> 1 0.2159123 -0.2159123 0.05170475
#> 2 0.0572329 -0.0572329 0.07367926
#> 3 0.2980792 -0.2980792 0.05741241
#> 4 0.5358032 -0.5358032 0.34870369
#> 5 0.5358032 -0.5358032 0.19990381
#> 6 0.5358032 -0.5358032 0.26859608
From the results above, we can see that the PageRank vector is not the same as the vectors obtained in the two previous approaches above.