Loading Libraries Necessary for Assignment

# Loading Libraries
library(doParallel)
library(foreach)
library(EBImage)
library(jpeg)
library(kableExtra)
library(OpenImageR)
library(imager)
setwd("C:/Users/biguz/Desktop/CUNY Data Science/Spring2021/DATA 605/Assignments/Assignment 4/")

Creating the plot_jpeg Function

# plot_jpeg Function
plot_jpeg = function(path, add=FALSE)
{ jpg = readJPEG(path, native=T) # read the file
  res = dim(jpg)[2:1] # get the resolution, [x, y]
  if (!add) # initialize an empty plot area if add==FALSE
    plot(1,1,xlim=c(1,res[1]),ylim=c(1,res[2]),asp=1,type='n',xaxs='i',yaxs='i',xaxt='n',yaxt='n',xlab='',ylab='',bty='n')
  rasterImage(jpg,1,1,res[1],res[2])
}

Reading JPEG Files into an Array

# Listing All .jpg Files in Directory
num=17
files=list.files("./images",pattern="\\.jpg")[1:num]

# Setting Dimensions
height=1200; width=2500;scale=20

# Creating Empty Array
im=array(rep(0,num*height/scale*width/scale*3), dim=c(num, height/scale, width/scale,3))

# Populating Array with Images
for (i in 1:num){
  temp=EBImage::resize(readJPEG(paste0("./images/", files[i])),height/scale, width/scale)
  im[i,,,]=array(temp,dim=c(1, height/scale, width/scale,3))}

Converting array to matrix

# Initiate Matrix
flat = matrix(0, 17, prod(dim(im)))

# Populating Matrix
for (i in 1:num) {
  newim <- readJPEG(paste0("./images/", files[i]))
  r=as.vector(im[i,,,1]); g=as.vector(im[i,,,2]);b=as.vector(im[i,,,3])
  flat[i,] <- t(c(r, g, b))
}

# Creating Shoe Dataframe
shoes=as.data.frame(t(flat))

Plot Shoe JPGs

#Plot the shoe images
par(mfrow=c(3,3))
par(mai=c(.3,.3,.3,.3))
for (i in 1:num){
  plot_jpeg(writeJPEG(im[i,,,]))
}

Eigencomponents from the correlation matrix

Scaling allows us to use either the correlation matrix as it would be the same to the covariance matrix.

# Scale the shoe dataframe
scaled=scale(shoes, center = TRUE, scale = TRUE)
mean.shoe=attr(scaled, "scaled:center") 
std.shoe=attr(scaled, "scaled:scale")

Create the covariance(corelation) matrix for which to find the eigenvalues and eigenvectors for

# Correlation
Sigma_ = cor(scaled)

# Eigen
myeigen=eigen(Sigma_)

Show how much variance each eigen shoe accounts for

I’m not sure how this works. Is this accounting for the variance in each shoe? or the variance in each pixel of each shoe?

cumsum(myeigen$values) / sum(myeigen$values)

##  [1] 0.6928202 0.7940449 0.8451072 0.8723847 0.8913841 0.9076337 0.9216282
##  [8] 0.9336889 0.9433871 0.9524454 0.9609037 0.9688907 0.9765235 0.9832209
## [15] 0.9894033 0.9953587 1.0000000

Create the values to plot the eigenshoes

I’m not sure what this is in terms of matrix math, but it seems very close to the SVD equation \(A = U\Sigma V^T\) where A = eigenshoes, U = scaled, \(\Sigma=\)scaling, and \(V^{T}=\)meyeigen_vectors. Is this the correct interpretation for this equation?

scaling=diag(myeigen$values^(-1/2)) / (sqrt(nrow(scaled)-1))
eigenshoes=scaled%*%myeigen$vectors%*%scaling
imageShow(array(eigenshoes[,1], c(60,125,3))) #showing first eigenshoe

Creating the principal components using princomp()

newdata=im
dim(newdata)=c(num,height*width*3/scale^2)
mypca=princomp(t(as.matrix(newdata)), scores=TRUE, cor=TRUE)

Using the PCA results to plot all the eigenshoes

mypca2=t(mypca$scores)
dim(mypca2)=c(17,height/scale,width/scale,3)
par(mfrow=c(4,5))
par(mai=c(.001,.001,.001,.001))
for (i in 1:num){#plot the 17 Eigenshoes
  plot_jpeg(writeJPEG(mypca2[i,,,], bg="black"))  #complete without reduction
}

a = round(mypca$sdev[1:num]^2/sum(mypca$sdev^2),3)
cumsum(a)

##  Comp.1  Comp.2  Comp.3  Comp.4  Comp.5  Comp.6  Comp.7  Comp.8  Comp.9 Comp.10 
##   0.693   0.794   0.845   0.872   0.891   0.907   0.921   0.933   0.943   0.952 
## Comp.11 Comp.12 Comp.13 Comp.14 Comp.15 Comp.16 Comp.17 
##   0.960   0.968   0.976   0.983   0.989   0.995   1.000

Additional Questions

We see that we only need 3 eigenshoes to account for 84.5% of the variability in the pixels(or shoes, still not sure). We can see these below:

I have a question about why using imageShow() vs the plot_jpeg() function created at the beginning of this script produce different eigenshoe images. Above we have the eigenshoes using the PCA components without reduction, while below we have the first three eigenshoes taken from the eigenshoes matrix.

Eigenshoe 1

Eigenshoe 2

Eigenshoe 3

Are the eigen shoes different from the PCA shoes because one is using reduction and one is not using reduction?

Using built in R svd function to reconstruct each shoe using only the first 3 components.

Showing shoe 1 as an example because I couldn’t figure out how to plot all the reconstructed shoes. Using plot_jpeg function I get a much different shoe, and I couldn’t convert the dimensions.

# Run svd on the scaled matrix
shoe_svd <- svd(scaled)

# increasing the number of components to a max of 17 from which to rebuild the originals
# will make the reconstructed show look more like the original
num_components <- 3

# Create the reconstruction matrix using the first 3 components
shoe_reconstruction <- shoe_svd$u[,1:num_components]%*% diag(shoe_svd$d[1:num_components]) %*% t(shoe_svd$v[,1:num_components])

Original

# changing the row of im to any number between 2:17 will change the shoe
plot_jpeg(writeJPEG(im[1,,,]))

Reconstructed

# changing the column of shoe_reconstruction to any number between 2:17 will change the shoe
imageShow(array(shoe_reconstruction[,1],c(60,125,3)))

Assignment 4

Stefano Biguzzi

2/20/2021