Data 605 - Assignment 4
Load libraries we will need
rm(list = ls())
library(jpeg)
library(OpenImageR)
library(Matrix)# Some initializations
# Please set here your data folder, and the show we will use to compare different variance settings
path2files <- "HW_Assignment4/data"
shoe2test <- 4
#setwd("Code")Read and Plot the first image (.jpg)
img <- jpeg::readJPEG(paste0(path2files,"/RC_2500x1200_2014_us_53446.jpg"))
dim(img)## [1] 1200 2500 3
# We will save the row,col,channels dimension
row_dim <- dim(img)[1]
col_dim <- dim(img)[2]
channel_dim <- dim(img)[3]
# plot the image
imageShow(img)Read in all image files
Read to a list all filenames in folder
filenames <- list.files(path=path2files,pattern = ".jpg")
# Initialize the matrix where all files will be stored
all_images_data <- matrix(0, length(filenames), prod(dim(img)))
# Show the dimensions of the matrix
dim(all_images_data)## [1] 17 9000000
# Matrix is 17 x 9MM. That is equals to 17 shoes, each having dimensions 1200 x 2500 x 3Function to Plot 1d Image Files
# This function didn't work in Markdown, it didn't display the image in the knitted file, so didn't use for submission. Does work correctly in RStudio
plot_shoe <- function(img1d) {
img3d <- array(img1d,c(row_dim,col_dim,channel_dim))
imageShow(img3d)
}Read all images into a single matrix
counter <- 1
for (filename in filenames) {
print(paste("loading file: ",filename))
img <- jpeg::readJPEG(paste0("HW_Assignment4/data/",filename))
red <- as.vector(img[,,1])
green <- as.vector(img[,,2])
blue <- as.vector(img[,,3])
#Images are stored a single vector red-green-blue
all_images_data[counter,] <- t(c(red, green, blue))
counter <- counter+1
}## [1] "loading file: RC_2500x1200_2014_us_53446.jpg"
## [1] "loading file: RC_2500x1200_2014_us_53455.jpg"
## [1] "loading file: RC_2500x1200_2014_us_53469.jpg"
## [1] "loading file: RC_2500x1200_2014_us_53626.jpg"
## [1] "loading file: RC_2500x1200_2014_us_53632.jpg"
## [1] "loading file: RC_2500x1200_2014_us_53649.jpg"
## [1] "loading file: RC_2500x1200_2014_us_53655.jpg"
## [1] "loading file: RC_2500x1200_2014_us_53663.jpg"
## [1] "loading file: RC_2500x1200_2014_us_53697.jpg"
## [1] "loading file: RC_2500x1200_2014_us_54018.jpg"
## [1] "loading file: RC_2500x1200_2014_us_54067.jpg"
## [1] "loading file: RC_2500x1200_2014_us_54106.jpg"
## [1] "loading file: RC_2500x1200_2014_us_54130.jpg"
## [1] "loading file: RC_2500x1200_2014_us_54148.jpg"
## [1] "loading file: RC_2500x1200_2014_us_54157.jpg"
## [1] "loading file: RC_2500x1200_2014_us_54165.jpg"
## [1] "loading file: RC_2500x1200_2014_us_54172.jpg"
# Lets transpose to get each column is an image
all_images_data <- t(all_images_data)
dim(all_images_data)## [1] 9000000 17
# Test plotting chosen shoe image
#plot_shoe(all_images_data[,shoe2test])
imageShow(array(all_images_data[,shoe2test],c(row_dim,col_dim,channel_dim)))Process matrix with SVD (Singular Value Decomposition)
Apply SVD to get U,Sigma and V matrices
# Scale data before applying SVD
scaled_data <- scale(all_images_data)
scaled_data[is.nan(scaled_data)] = 0
svd_decomp <- svd(scaled_data)Compute variance explained by each component
When I checked the variance of the Sigmas from SVD, the result I found said to square the sigmas. Not sure if this squaring also applies to PCA though.
plot(svd_decomp$d^2/sum(svd_decomp$d^2), type="b",xlab = "Column", ylab = "Prop. of variance explained", pch = 19)Caculate number of vectors needed to explain X% of variance
# We will start with 80%
var_pct <- 0.8
vectors <- which(cumsum(svd_decomp$d^2/sum(svd_decomp$d^2)) >= var_pct)[1]
print(paste0("Vectors to use: ", vectors))## [1] "Vectors to use: 3"
Plot ONE shoe image using only a few vectors (80% variance)
Please note that although we have ALL shoes in a single matrix and the SVD was applied to all shoes, I decided just to plot one chosen shoe at a time. This was just my personal preference, vs what the Professor did which was to show all shoes at the same time.
# Reconstruct all shoes using only a subset of columns which explain X% of variability.
newimage <- svd_decomp$u[, 1:vectors] %*% diag(svd_decomp$d[1:vectors]) %*% t(svd_decomp$v[,1:vectors])# Select ONE shoe of the reconstructed matrix and plot it.
imageShow(array(newimage[,shoe2test],c(row_dim,col_dim,channel_dim)))Compare different levels of variance
70%
var_pct <- 0.7
vectors <- which(cumsum(svd_decomp$d^2/sum(svd_decomp$d^2)) >= var_pct)[1]
print(paste0("Vectors to use: ", vectors))## [1] "Vectors to use: 2"
newimage <- svd_decomp$u[, 1:vectors] %*% diag(svd_decomp$d[1:vectors]) %*% t(svd_decomp$v[,1:vectors])
imageShow(array(newimage[,shoe2test],c(row_dim,col_dim,channel_dim)))90%
var_pct <- 0.9
vectors <- which(cumsum(svd_decomp$d^2/sum(svd_decomp$d^2)) >= var_pct)[1]
print(paste0("Vectors to use: ", vectors))## [1] "Vectors to use: 7"
newimage <- svd_decomp$u[, 1:vectors] %*% diag(svd_decomp$d[1:vectors]) %*% t(svd_decomp$v[,1:vectors])
imageShow(array(newimage[,shoe2test],c(row_dim,col_dim,channel_dim)))95%
var_pct <- 0.95
vectors <- which(cumsum(svd_decomp$d^2/sum(svd_decomp$d^2)) >= var_pct)[1]
print(paste0("Vectors to use: ", vectors))## [1] "Vectors to use: 11"
newimage <- svd_decomp$u[, 1:vectors] %*% diag(svd_decomp$d[1:vectors]) %*% t(svd_decomp$v[,1:vectors])
imageShow(array(newimage[,shoe2test],c(row_dim,col_dim,channel_dim)))98%
var_pct <- 0.98
vectors <- which(cumsum(svd_decomp$d^2/sum(svd_decomp$d^2)) >= var_pct)[1]
print(paste0("Vectors to use: ", vectors))## [1] "Vectors to use: 14"
newimage <- svd_decomp$u[, 1:vectors] %*% diag(svd_decomp$d[1:vectors]) %*% t(svd_decomp$v[,1:vectors])
imageShow(array(newimage[,shoe2test],c(row_dim,col_dim,channel_dim)))99%
var_pct <- 0.99
vectors <- which(cumsum(svd_decomp$d^2/sum(svd_decomp$d^2)) >= var_pct)[1]
print(paste0("Vectors to use: ", vectors))## [1] "Vectors to use: 16"
newimage <- svd_decomp$u[, 1:vectors] %*% diag(svd_decomp$d[1:vectors]) %*% t(svd_decomp$v[,1:vectors])
imageShow(array(newimage[,shoe2test],c(row_dim,col_dim,channel_dim)))