Firstly, we will write a function that generates random nonnegative matrices \(W\) and \(H\) such that for \(V \in \mathbb{R}^{m \times n}\) then \(W \in \mathbb{R}^{m \times r}\) and \(H \in \mathbb{R}^{r \times n}\) with \(r\) is rank of matrix \(V\).
# Load some packages
rm(list = ls())
library(tidyverse)
library(ggplot2)
library(Matrix)
library(pixmap)
# The number of iterations equal maxiters - 1
maxiters <- 101random_initialization <- function(V)
{
# Find rank V
r <- rankMatrix(V)
# Get number of row of V
m <- dim(V)[1]
# Get number of column of V
n <- dim(V)[2]
# Create matrix W (m x r) with entry in [1,2]
W <- matrix(runif(m*r,1,2), nrow = m, ncol = r)
# Create matrix V (r x n) with entry in [1,2]
H <- matrix(runif(n*r,1,2), nrow = r, ncol = n)
return (list(W = W,H = H))
}The first NMF algorithm was derived by Lee and Seung named MU.
multiplication_update <- function(V,W,H, maxiters, error = 0.1)
{
eps <- 1e-05
MUlog <- rep(NA, maxiters)
MUlog[[1]] <- norm(V - W %*% H,"F")
i <- 2
while (i <= maxiters && MUlog[[i-1]] > error)
{
# update H
H <- H * (t(W) %*% V) / ((t(W) %*% W %*% H)+eps)
# update W
W <- W * (V %*% t(H)) / ((W %*% H %*% t(H))+eps)
# Trace F norm
MUlog[[i]] <- norm(V - W %*% H,"F")
i <- i + 1
}
return (list(W = W,H = H,MUlog = MUlog))
}We now introduce an alternating gradient (AG) algorithm based on Gradient Method
alternating_gradient <- function(V,W,H, maxiters = 3, error = 0.1)
{
AGlog <- rep(NA, maxiters)
AGlog[[1]] <- norm(V - W %*% H,"F")
i <- 2
while (i <= maxiters && AGlog[[i-1]] > error)
{
# Compute gradient when W fixed
derivative_h <- (t(W) %*% W %*% H) - (t(W) %*% V)
a <- norm(derivative_h,"F")^2
b <- norm(W %*% (t(W) %*% W %*% H - t(W) %*% V),"F")^2
infimum <- H[which(derivative_h>0)]/derivative_h[which(derivative_h>0)]
stepsize <- min(a/b,infimum)
# update H
H <- H - stepsize*(derivative_h)
# Compute gradient when H fixed
derivative_w <- (W %*% H %*% t(H)) - (V %*% t(H))
a <- norm(derivative_w,"F")^2
b <- norm((W %*% H %*% t(H) - V %*% t(H)) %*% H,"F")^2
infimum <- W[which(derivative_w>0)]/derivative_w[which(derivative_w>0)]
stepsize <- min(a/b,infimum)
# update W
W <- W - stepsize*(derivative_w)
# Trace F norm
AGlog[[i]] <- norm(V - W %*% H,"F")
i <- i + 1
}
return (list(W = W, H = H, AGlog = AGlog))
}Because numerical experiments show that the AG algorithm is not better than the MU algorithm, we propose an improvement of the AG algorithm by using alternating projected gradient approach which are denoted by the APG algorithm in short
alternating_projected_gradient <- function(V,W,H, maxiters, error = 0.1)
{
APGlog <- rep(NA, maxiters)
APGlog[[1]] <- norm(V - W %*% H,"F")
i <- 2
while (i <= maxiters && APGlog[[i-1]] > error)
{
# Compute gradient when W fixed
derivative_h <- (t(W) %*% W %*% H) - (t(W) %*% V)
a <- norm(derivative_h,"F")^2
b <- norm(W %*% (t(W) %*% W %*% H - t(W) %*% V),"F")^2
stepsize <- a/b
# update H
H <- H - stepsize*(derivative_h)
# Set all negative value in H to 0
H <- pmax(H,0)
# Compute gradient when H fixed
derivative_w <- (W %*% H %*% t(H)) - (V %*% t(H))
a <- norm(derivative_w,"F")^2
b <- norm((W %*% H %*% t(H) - V %*% t(H)) %*% H,"F")^2
stepsize <- a/b
# update W
W <- W - stepsize*(derivative_w)
# Set all negative value in W to 0
W <- pmax(W,0)
# Trace F norm
APGlog[[i]] <- norm(V - W %*% H,"F")
i <- i + 1
}
return (list(W = W, H = H, APGlog = APGlog))
}Let us download the ATT Faces dataset. The dataset consists of 10 black and white photos of each member of a group 40 individuals. 400 images in total.
# "https://www.kaggle.com/datasets/kasikrit/att-database-of-faces/download?datasetVersionNumber=2"
# set working directory have the dataset
setwd("D:\\faces")
# search folder have the dataset
dirs <- list.dirs("D:\\faces")
# search files .pgm
files <- list.files(pattern = ".pgm", recursive = TRUE)
V <- matrix(nrow = 92*112, ncol = length(files))
for (i in 1:length(files)){
v <- read.pnm(file = files[i], cellres=1)
V[,i] <- floor(as.vector(v@grey)*255)
}
plot_face <- function(arr10304, col=gray(0:255/255)){
m <- matrix(arr10304, ncol=92, nrow = 112)
image(t(m[112:1,]), asp=112/92, axes = FALSE, col=col)
}
# Plot some faces
par(mfrow=c(10,20), mar=c(0, 0.2, 0, 0), oma=c(0,0,0,0))
for(i in sample(400,200)){
plot_face(V[,i])
}Now we NMF the matrix V, which is every column is a face image
# create matrix W and H
cre_matrix <- random_initialization(V)
res1 <- multiplication_update(V,cre_matrix$W,cre_matrix$H,maxiters)
#res2 <- alternating_gradient(V,cre_matrix$W,cre_matrix$H,maxiters)
res3 <- alternating_projected_gradient(V,cre_matrix$W,cre_matrix$H,maxiters)
df_res <- data.frame(num =1:(length(res1$MUlog)-1), value1 = res1$MUlog[2:maxiters], value2 = res3$APGlog[2:maxiters])
#Plot line graph
df_res %>%
ggplot(aes(x = num)) +
geom_line(aes(y = value1), color = "darkred") +
geom_line(aes(y = value2), color= "steelblue") +
labs(x = "Iterations", y = "||V-WH||",
title = "Compare the convergence rate of the algorithms with ATT faces dataset",
caption = "Source Data: www.kaggle.com") +
annotate("text", x=maxiters+0.6, y=res1$MUlog[maxiters], label= "MU") +
annotate("text", x=maxiters+0.6, y=res3$APGlog[maxiters], label= "APG")The values of cost function for MU and checking nonincreasing
print(res1$MUlog)## [1] 1602251.58 74945.71 74779.13 74771.46 74763.36 74754.63
## [7] 74745.04 74734.33 74722.17 74708.17 74691.83 74672.52
## [13] 74649.48 74621.71 74588.00 74546.80 74496.14 74433.60
## [19] 74356.13 74259.98 74140.56 73992.35 73808.86 73582.70
## [25] 73305.86 72970.28 72568.83 72096.80 71553.79 70945.58
## [31] 70285.30 69592.67 68890.94 68201.72 67539.79 66910.21
## [37] 66308.56 65723.74 65141.44 64547.22 63928.72 63277.56
## [43] 62591.58 61877.42 61151.18 60433.58 59741.02 59080.29
## [49] 58450.82 57849.49 57273.47 56720.83 56190.24 55680.56
## [55] 55190.61 54719.10 54264.60 53825.59 53400.53 52987.94
## [61] 52586.40 52194.65 51811.57 51436.22 51067.81 50705.70
## [67] 50349.38 49998.46 49652.64 49311.70 48975.49 48643.91
## [73] 48316.92 47994.51 47676.68 47363.50 47055.00 46751.26
## [79] 46452.31 46158.22 45869.00 45584.65 45305.16 45030.48
## [85] 44760.54 44495.25 44234.50 43978.15 43726.09 43478.14
## [91] 43234.18 42994.03 42757.56 42524.62 42295.07 42068.78
## [97] 41845.62 41625.49 41408.28 41193.88 40982.22
cat("Is nonincreasing?:", !is.unsorted(rev(res1$MUlog)))## Is nonincreasing?: TRUE
The values of cost function for APG and checking nonincreasing
print(res3$APGlog)## [1] 1602251.58 75873.05 74236.22 73379.21 72487.46 71401.22
## [7] 70016.61 67633.52 65026.66 63065.69 60901.15 59400.99
## [13] 57845.89 56624.44 55471.00 54241.67 53303.92 52206.64
## [19] 51349.92 50459.52 49711.63 48990.77 48370.41 47759.05
## [25] 47222.18 46690.85 46218.31 45745.35 45318.66 44890.06
## [31] 44497.70 44103.83 43739.11 43373.64 43032.24 42690.66
## [37] 42369.23 42047.80 41743.64 41439.73 41151.09 40863.13
## [43] 40588.76 40315.29 40054.00 39793.73 39544.49 39296.28
## [49] 39058.12 38820.98 38593.16 38366.63 38148.62 37932.10
## [55] 37723.35 37516.16 37316.00 37117.61 36925.39 36735.02
## [61] 36550.27 36367.29 36189.54 36013.66 35842.56 35673.46
## [67] 35508.39 35345.55 35186.28 35029.52 34875.71 34724.60
## [73] 34576.06 34430.17 34286.55 34145.80 34006.99 33871.06
## [79] 33736.73 33605.27 33475.25 33348.04 33222.13 33099.00
## [85] 32977.01 32857.84 32739.69 32624.35 32509.82 32398.05
## [91] 32286.90 32178.46 32070.50 31965.20 31860.21 31757.94
## [97] 31655.88 31556.55 31457.27 31360.75 31264.15
cat("Is nonincreasing?:", !is.unsorted(rev(res3$APGlog)))## Is nonincreasing?: TRUE