DATA 605 Assignment 10

Playing with PageRank

You’ll verify for yourself that PageRank works by performing calculations on a small universe of web pages.

Let’s use the 6 page universe that we had in the course notes. For this directed graph, perform the following calculations in R.

Form the A matrix. Then, introduce decay and form the B matrix as we did in the course notes.

# A matrix
r1 <- c(0, 1/2, 1/2, 0, 0, 0)
r2 <- rep(0, 6)
r3 <- c(1/3, 1/3, 0, 0, 1/3, 0)
r4 <- c(0, 0, 0, 0, 1/2, 1/2)
r5 <- c(0, 0, 0, 1/2, 0, 1/2)
r6 <- c(0, 0, 0, 1, 0, 0)

(A <-  matrix(c(r1, r2, r3, r4, r5, r6), 6, byrow = T))

##           [,1]      [,2] [,3] [,4]      [,5] [,6]
## [1,] 0.0000000 0.5000000  0.5  0.0 0.0000000  0.0
## [2,] 0.0000000 0.0000000  0.0  0.0 0.0000000  0.0
## [3,] 0.3333333 0.3333333  0.0  0.0 0.3333333  0.0
## [4,] 0.0000000 0.0000000  0.0  0.0 0.5000000  0.5
## [5,] 0.0000000 0.0000000  0.0  0.5 0.0000000  0.5
## [6,] 0.0000000 0.0000000  0.0  1.0 0.0000000  0.0

# Notice node 2 has no outlink, which is known as "dangling node". In order to stabilize the convergence we will address this by replacing the vectors in node 2 with 1/6, since there are six web pages and 1/6 is the the probability of having equal chance of landing in any of the 6 web pages.

r2_new <- rep(1/6, 6)

(A <-  matrix(c(r1, r2_new, r3, r4, r5, r6), 6, byrow = T))

##           [,1]      [,2]      [,3]      [,4]      [,5]      [,6]
## [1,] 0.0000000 0.5000000 0.5000000 0.0000000 0.0000000 0.0000000
## [2,] 0.1666667 0.1666667 0.1666667 0.1666667 0.1666667 0.1666667
## [3,] 0.3333333 0.3333333 0.0000000 0.0000000 0.3333333 0.0000000
## [4,] 0.0000000 0.0000000 0.0000000 0.0000000 0.5000000 0.5000000
## [5,] 0.0000000 0.0000000 0.0000000 0.5000000 0.0000000 0.5000000
## [6,] 0.0000000 0.0000000 0.0000000 1.0000000 0.0000000 0.0000000

# introduce decay
d <- 0.85
n <- nrow(A)
(B <- d * A + (1 - d) / n)

##           [,1]      [,2]      [,3]      [,4]      [,5]      [,6]
## [1,] 0.0250000 0.4500000 0.4500000 0.0250000 0.0250000 0.0250000
## [2,] 0.1666667 0.1666667 0.1666667 0.1666667 0.1666667 0.1666667
## [3,] 0.3083333 0.3083333 0.0250000 0.0250000 0.3083333 0.0250000
## [4,] 0.0250000 0.0250000 0.0250000 0.0250000 0.4500000 0.4500000
## [5,] 0.0250000 0.0250000 0.0250000 0.4500000 0.0250000 0.4500000
## [6,] 0.0250000 0.0250000 0.0250000 0.8750000 0.0250000 0.0250000

Start with a uniform rank vector r and perform power iterations on B till convergence. That is, compute the solution r = Bn × r. Attempt this for a sufficiently large n so that r actually converges.

library(matrixcalc)
# Uniform rank vector
r <- rep(1 / n, n)

# Ten iterations
 matrix.power(t(B), 10) %*% r

##            [,1]
## [1,] 0.05205661
## [2,] 0.07428990
## [3,] 0.05782138
## [4,] 0.34797267
## [5,] 0.19975859
## [6,] 0.26810085

# Twenty iterations
 matrix.power(t(B), 20) %*% r

##            [,1]
## [1,] 0.05170616
## [2,] 0.07368173
## [3,] 0.05741406
## [4,] 0.34870083
## [5,] 0.19990313
## [6,] 0.26859408

 # Twenty iterations
 matrix.power(t(B), 30) %*% r

##            [,1]
## [1,] 0.05170475
## [2,] 0.07367927
## [3,] 0.05741242
## [4,] 0.34870367
## [5,] 0.19990381
## [6,] 0.26859607

 # Thirty iterations
 matrix.power(t(B), 40) %*% r

##            [,1]
## [1,] 0.05170475
## [2,] 0.07367926
## [3,] 0.05741241
## [4,] 0.34870369
## [5,] 0.19990381
## [6,] 0.26859608

 # Forty iterations
 matrix.power(t(B), 50) %*% r

##            [,1]
## [1,] 0.05170475
## [2,] 0.07367926
## [3,] 0.05741241
## [4,] 0.34870369
## [5,] 0.19990381
## [6,] 0.26859608

 # convergence has occured by 40 iterations
 
 convergence <- matrix.power(t(B), 40) %*% r

Compute the eigen-decomposition of B and verify that you indeed get an eigenvalue of 1 as the largest eigenvalue and that its corresponding eigenvector is the same vector that you obtained in the previous power iteration method. Further, this eigenvector has all positive entries and it sums to 1.

(eigen_d <- eigen(B))

## $values
## [1]  1.00000000  0.57619235 -0.42500001 -0.42499999 -0.34991524 -0.08461044
## 
## $vectors
##            [,1]       [,2]          [,3]          [,4]         [,5]
## [1,] -0.4082483 -0.7278031 -5.345224e-01  5.345225e-01 -0.795670150
## [2,] -0.4082483 -0.3721164 -5.216180e-09 -5.216180e-09  0.059710287
## [3,] -0.4082483 -0.5389259  5.345225e-01 -5.345225e-01  0.602762996
## [4,] -0.4082483  0.1174605 -2.672613e-01  2.672612e-01  0.002611877
## [5,] -0.4082483  0.1174605 -2.672613e-01  2.672612e-01  0.002611877
## [6,] -0.4082483  0.1174605  5.345225e-01 -5.345224e-01  0.002611877
##              [,6]
## [1,]  0.486246420
## [2,] -0.673469294
## [3,]  0.556554233
## [4,] -0.009145393
## [5,] -0.009145393
## [6,] -0.009145393

# confirm the largest eigenvalue is 1
(eigen_d <- as.numeric(eigen(B)$values))

## [1]  1.00000000  0.57619235 -0.42500001 -0.42499999 -0.34991524 -0.08461044

(max(eigen_d))

## [1] 1

# vectors corresponding to eigenvalue 1
(eigen_d <- as.numeric(eigen(B)$vectors[,which.max(eigen(B)$values)]))

## [1] -0.4082483 -0.4082483 -0.4082483 -0.4082483 -0.4082483 -0.4082483

# normalize
(eigen_nor <- (1/sum(eigen_d))*eigen_d)

## [1] 0.1666667 0.1666667 0.1666667 0.1666667 0.1666667 0.1666667

# sum of normalize eigen vectors
(sum(eigen_nor))

## [1] 1

Use the graph package in R and its page.rank method to compute the Page Rank of the graph as given in A. Note that you don’t need to apply decay. The package starts with a connected graph and applies decay internally. Verify that you do get the same PageRank vector as the two approaches above.

library(igraph)

## 
## Attaching package: 'igraph'

## The following object is masked from 'package:matrixcalc':
## 
##     %s%

## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum

## The following object is masked from 'package:base':
## 
##     union

A <-  matrix(c(r1, r2, r3, r4, r5, r6), 6, byrow = T)
graph_A <- graph_from_adjacency_matrix(A, weighted=TRUE)
plot(graph_A)

# verification after rounded to 5th decimal point
(pageRank <- as.matrix(page.rank(graph_A)$vector))

##            [,1]
## [1,] 0.05170475
## [2,] 0.07367926
## [3,] 0.05741241
## [4,] 0.34870369
## [5,] 0.19990381
## [6,] 0.26859608

round(convergence, 5) == round(pageRank, 5)

##      [,1]
## [1,] TRUE
## [2,] TRUE
## [3,] TRUE
## [4,] TRUE
## [5,] TRUE
## [6,] TRUE

Reference

http://www.math.cornell.edu/~mec/Winter2009/RalucaRemus/Lecture3/lecture3.html

DATA 605 Assignment 10

Ahsanul Choudhury

April 9, 2017

Playing with PageRank

Reference