QUESTION 1.1:

You’ll verify for yourself that PageRank works by performing calculations on a small universe of web pages. Let’s use the 6 page universe that we had in the course notes. For this directed graph, perform the following calculations in R. . Form the A matrix. Then, introduce decay and form the B matrix as we did in the course notes.

library(MASS)

A <- matrix(c(0, 1/2, 1/2, 0, 0,  0,
              0, 0, 1, 0, 0, 0,
              1/4, 1/4, 0, 0, 1/4, 1/4,
              0, 0, 0, 0, 1/2, 1/2,
              0, 0, 0, 1/2, 0, 1/2,
              0, 0, 1/2, 1/2, 0, 0), nrow=6)
A
##      [,1] [,2] [,3] [,4] [,5] [,6]
## [1,]  0.0    0 0.25  0.0  0.0  0.0
## [2,]  0.5    0 0.25  0.0  0.0  0.0
## [3,]  0.5    1 0.00  0.0  0.0  0.5
## [4,]  0.0    0 0.00  0.0  0.5  0.5
## [5,]  0.0    0 0.25  0.5  0.0  0.0
## [6,]  0.0    0 0.25  0.5  0.5  0.0
ri <- matrix(c(.167,.167,.167,.167,.167,.167), nrow = 6)
ri
##       [,1]
## [1,] 0.167
## [2,] 0.167
## [3,] 0.167
## [4,] 0.167
## [5,] 0.167
## [6,] 0.167
B <- A%*%ri
B
##         [,1]
## [1,] 0.04175
## [2,] 0.12525
## [3,] 0.33400
## [4,] 0.16700
## [5,] 0.12525
## [6,] 0.20875
decay <- 0.85
B_decay <- decay*A + (0.15/6) # i.e 1-0.8 = 0.15
B_decay
##       [,1]  [,2]   [,3]  [,4]  [,5]  [,6]
## [1,] 0.025 0.025 0.2375 0.025 0.025 0.025
## [2,] 0.450 0.025 0.2375 0.025 0.025 0.025
## [3,] 0.450 0.875 0.0250 0.025 0.025 0.450
## [4,] 0.025 0.025 0.0250 0.025 0.450 0.450
## [5,] 0.025 0.025 0.2375 0.450 0.025 0.025
## [6,] 0.025 0.025 0.2375 0.450 0.450 0.025
  • Start with a uniform rank vector r and perform power iterations on B till convergence. That is, compute the solution r = Bn × r. Attempt this for a sufficiently large n so that r actually converges.
matrixPower <- function(A, power, B_decay){
  if(power == 1){
    return(A)
  }else{
    return(matrixPower(A %*% B_decay, power - 1, B_decay))
  }
}

matrixPower(B_decay,2,B_decay) %*% ri
##            [,1]
## [1,] 0.09070187
## [2,] 0.11643031
## [3,] 0.24862125
## [4,] 0.16700000
## [5,] 0.16167688
## [6,] 0.21756969
sum(matrixPower(B_decay,2,B_decay) %*% ri) # Check it out, it sum to 1!
## [1] 1.002
matrixPower(B_decay,5,B_decay) %*% ri
##            [,1]
## [1,] 0.07808633
## [2,] 0.11176509
## [3,] 0.24842495
## [4,] 0.18657032
## [5,] 0.15491841
## [6,] 0.22223491
matrixPower(B_decay,10,B_decay) %*% ri
##            [,1]
## [1,] 0.07753473
## [2,] 0.11049637
## [3,] 0.24693880
## [4,] 0.18665608
## [5,] 0.15687039
## [6,] 0.22350363
matrixPower(B_decay,500,B_decay) %*% ri
##            [,1]
## [1,] 0.07751358
## [2,] 0.11045685
## [3,] 0.24688743
## [4,] 0.18672660
## [5,] 0.15687239
## [6,] 0.22354315
matrixPower(B_decay,500,B_decay) %*% ri
##            [,1]
## [1,] 0.07751358
## [2,] 0.11045685
## [3,] 0.24688743
## [4,] 0.18672660
## [5,] 0.15687239
## [6,] 0.22354315
sum(matrixPower(B_decay,600,B_decay) %*% ri) # Even at 600, it also sum to 1!
## [1] 1.002
  • Compute the eigen-decomposition of B and verify that you indeed get an eigenvalue of 1 as the largest eigenvalue and that its corresponding eigenvector is the same vector that you obtained in the previous power iteration method. Further, this eigenvector has all positive entries and it sums to 1.
eigens <- eigen(B_decay, only.values = FALSE)$vectors[, 1]
data.frame(eigens)
##          eigens
## 1 -0.1784825+0i
## 2 -0.2543376+0i
## 3 -0.5684822+0i
## 4 -0.4299561+0i
## 5 -0.3612138+0i
## 6 -0.5147297+0i
eigens_check <- (eigens)*(1/sum(eigens))

data.frame(eigens_check)
##    eigens_check
## 1 0.07735886+0i
## 2 0.11023638+0i
## 3 0.24639464+0i
## 4 0.18635389+0i
## 5 0.15655927+0i
## 6 0.22309696+0i
  • Use the graph package in R and its page.rank method to compute the Page Rank of the graph as given in A. Note that you don’t need to apply decay. The package starts with a connected graph and applies decay internally.
suppressMessages(library(igraph))

relations <- data.frame(from=c(4,5, 5, 3, 4, 4, 3, 4, 5, 3, 4, 5),
                        to = c(4, 5, 4, 4, 2, 3, 2, 4, 5, 5, 2, 3),
                        weight = c(0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25))

g <- graph.data.frame(relations, directed = TRUE)

page.rank(g)
## $vector
##         4         5         3         2 
## 0.2913035 0.2417819 0.2269897 0.2399248 
## 
## $value
## [1] 1
## 
## $options
## NULL
plot(g, layout=layout.kamada.kawai, vertex.label=V(g)$name, vertex.shape="circle", vertex.size=20, asp=FALSE)

#Verify that you do get the same PageRank vector as the two approaches above.
checks <- suppressWarnings(eigens_check) / (page.rank(g)$vector) * page.rank(g)$vector
## Warning in suppressWarnings(eigens_check)/(page.rank(g)$vector): longer
## object length is not a multiple of shorter object length
## Warning in suppressWarnings(eigens_check)/(page.rank(g)$vector) *
## page.rank(g)$vector: longer object length is not a multiple of shorter
## object length
checks
## [1] 0.07735886+0i 0.11023638+0i 0.24639464+0i 0.18635389+0i 0.15655927+0i
## [6] 0.22309696+0i
(checks==eigens_check)
## [1] TRUE TRUE TRUE TRUE TRUE TRUE