Data 605 - Assignment 4

Load libraries we will need

rm(list = ls())
library(jpeg)
library(OpenImageR)
library(Matrix)
# Some initializations
# Please set here your data folder, and the show we will use to compare different variance settings
path2files <- "HW_Assignment4/data"
shoe2test <- 4
#setwd("Code")

Read and Plot the first image (.jpg)

img <- jpeg::readJPEG(paste0(path2files,"/RC_2500x1200_2014_us_53446.jpg"))

dim(img)
## [1] 1200 2500    3
# We will save the row,col,channels dimension
row_dim <- dim(img)[1]
col_dim <- dim(img)[2]
channel_dim <- dim(img)[3]

# plot the image
imageShow(img)

Read in all image files

Read to a list all filenames in folder

filenames <- list.files(path=path2files,pattern = ".jpg")

# Initialize the matrix where all files will be stored
all_images_data <- matrix(0, length(filenames), prod(dim(img))) 

# Show the dimensions of the matrix
dim(all_images_data)
## [1]      17 9000000
# Matrix is 17 x 9MM.  That is equals to 17 shoes, each having dimensions 1200 x 2500 x 3

Function to Plot 1d Image Files

# This function didn't work in Markdown, it didn't display the image in the knitted file, so didn't use for submission. Does work correctly in RStudio

plot_shoe <- function(img1d) {
  img3d <- array(img1d,c(row_dim,col_dim,channel_dim))
  imageShow(img3d)
  }

Read all images into a single matrix

counter <- 1

for (filename in filenames) {
  print(paste("loading file: ",filename))
  img <- jpeg::readJPEG(paste0("HW_Assignment4/data/",filename))
  red  <- as.vector(img[,,1])
  green  <- as.vector(img[,,2])
  blue  <- as.vector(img[,,3])
  
  #Images are stored a single vector red-green-blue  
  all_images_data[counter,] <- t(c(red, green, blue))  

  counter <- counter+1
}
## [1] "loading file:  RC_2500x1200_2014_us_53446.jpg"
## [1] "loading file:  RC_2500x1200_2014_us_53455.jpg"
## [1] "loading file:  RC_2500x1200_2014_us_53469.jpg"
## [1] "loading file:  RC_2500x1200_2014_us_53626.jpg"
## [1] "loading file:  RC_2500x1200_2014_us_53632.jpg"
## [1] "loading file:  RC_2500x1200_2014_us_53649.jpg"
## [1] "loading file:  RC_2500x1200_2014_us_53655.jpg"
## [1] "loading file:  RC_2500x1200_2014_us_53663.jpg"
## [1] "loading file:  RC_2500x1200_2014_us_53697.jpg"
## [1] "loading file:  RC_2500x1200_2014_us_54018.jpg"
## [1] "loading file:  RC_2500x1200_2014_us_54067.jpg"
## [1] "loading file:  RC_2500x1200_2014_us_54106.jpg"
## [1] "loading file:  RC_2500x1200_2014_us_54130.jpg"
## [1] "loading file:  RC_2500x1200_2014_us_54148.jpg"
## [1] "loading file:  RC_2500x1200_2014_us_54157.jpg"
## [1] "loading file:  RC_2500x1200_2014_us_54165.jpg"
## [1] "loading file:  RC_2500x1200_2014_us_54172.jpg"
# Lets transpose to get each column is an image
all_images_data <- t(all_images_data)
dim(all_images_data)
## [1] 9000000      17
# Test plotting chosen shoe image
#plot_shoe(all_images_data[,shoe2test])
 imageShow(array(all_images_data[,shoe2test],c(row_dim,col_dim,channel_dim)))

Process matrix with SVD (Singular Value Decomposition)

Apply SVD to get U,Sigma and V matrices

# Scale data before applying SVD
scaled_data <- scale(all_images_data)
scaled_data[is.nan(scaled_data)] = 0
svd_decomp <- svd(scaled_data)

Compute variance explained by each component

When I checked the variance of the Sigmas from SVD, the result I found said to square the sigmas. Not sure if this squaring also applies to PCA though.

plot(svd_decomp$d^2/sum(svd_decomp$d^2), type="b",xlab = "Column", ylab = "Prop. of variance explained", pch = 19)

Caculate number of vectors needed to explain X% of variance

# We will start with 80%
var_pct <- 0.8
vectors <- which(cumsum(svd_decomp$d^2/sum(svd_decomp$d^2)) >= var_pct)[1]
print(paste0("Vectors to use: ", vectors))
## [1] "Vectors to use: 3"

Plot ONE shoe image using only a few vectors (80% variance)

Please note that although we have ALL shoes in a single matrix and the SVD was applied to all shoes, I decided just to plot one chosen shoe at a time. This was just my personal preference, vs what the Professor did which was to show all shoes at the same time.

# Reconstruct all shoes using only a subset of columns which explain X% of variability.

newimage <- svd_decomp$u[, 1:vectors] %*% diag(svd_decomp$d[1:vectors]) %*% t(svd_decomp$v[,1:vectors])
# Select ONE shoe of the reconstructed matrix and plot it.
imageShow(array(newimage[,shoe2test],c(row_dim,col_dim,channel_dim)))

Compare different levels of variance

70%

var_pct <- 0.7
vectors <- which(cumsum(svd_decomp$d^2/sum(svd_decomp$d^2)) >= var_pct)[1]
print(paste0("Vectors to use: ", vectors))
## [1] "Vectors to use: 2"
newimage <- svd_decomp$u[, 1:vectors] %*% diag(svd_decomp$d[1:vectors]) %*% t(svd_decomp$v[,1:vectors])
imageShow(array(newimage[,shoe2test],c(row_dim,col_dim,channel_dim)))

90%

var_pct <- 0.9
vectors <- which(cumsum(svd_decomp$d^2/sum(svd_decomp$d^2)) >= var_pct)[1]
print(paste0("Vectors to use: ", vectors))
## [1] "Vectors to use: 7"
newimage <- svd_decomp$u[, 1:vectors] %*% diag(svd_decomp$d[1:vectors]) %*% t(svd_decomp$v[,1:vectors])
imageShow(array(newimage[,shoe2test],c(row_dim,col_dim,channel_dim)))

95%

var_pct <- 0.95
vectors <- which(cumsum(svd_decomp$d^2/sum(svd_decomp$d^2)) >= var_pct)[1]
print(paste0("Vectors to use: ", vectors))
## [1] "Vectors to use: 11"
newimage <- svd_decomp$u[, 1:vectors] %*% diag(svd_decomp$d[1:vectors]) %*% t(svd_decomp$v[,1:vectors])
imageShow(array(newimage[,shoe2test],c(row_dim,col_dim,channel_dim)))

98%

var_pct <- 0.98
vectors <- which(cumsum(svd_decomp$d^2/sum(svd_decomp$d^2)) >= var_pct)[1]
print(paste0("Vectors to use: ", vectors))
## [1] "Vectors to use: 14"
newimage <- svd_decomp$u[, 1:vectors] %*% diag(svd_decomp$d[1:vectors]) %*% t(svd_decomp$v[,1:vectors])
imageShow(array(newimage[,shoe2test],c(row_dim,col_dim,channel_dim)))

99%

var_pct <- 0.99
vectors <- which(cumsum(svd_decomp$d^2/sum(svd_decomp$d^2)) >= var_pct)[1]
print(paste0("Vectors to use: ", vectors))
## [1] "Vectors to use: 16"
newimage <- svd_decomp$u[, 1:vectors] %*% diag(svd_decomp$d[1:vectors]) %*% t(svd_decomp$v[,1:vectors])
imageShow(array(newimage[,shoe2test],c(row_dim,col_dim,channel_dim)))