# Loading Libraries
library(doParallel)
library(foreach)
library(EBImage)
library(jpeg)
library(kableExtra)
library(OpenImageR)
library(imager)
setwd("C:/Users/biguz/Desktop/CUNY Data Science/Spring2021/DATA 605/Assignments/Assignment 4/")
# plot_jpeg Function
plot_jpeg = function(path, add=FALSE)
{ jpg = readJPEG(path, native=T) # read the file
res = dim(jpg)[2:1] # get the resolution, [x, y]
if (!add) # initialize an empty plot area if add==FALSE
plot(1,1,xlim=c(1,res[1]),ylim=c(1,res[2]),asp=1,type='n',xaxs='i',yaxs='i',xaxt='n',yaxt='n',xlab='',ylab='',bty='n')
rasterImage(jpg,1,1,res[1],res[2])
}
# Listing All .jpg Files in Directory
num=17
files=list.files("./images",pattern="\\.jpg")[1:num]
# Setting Dimensions
height=1200; width=2500;scale=20
# Creating Empty Array
im=array(rep(0,num*height/scale*width/scale*3), dim=c(num, height/scale, width/scale,3))
# Populating Array with Images
for (i in 1:num){
temp=EBImage::resize(readJPEG(paste0("./images/", files[i])),height/scale, width/scale)
im[i,,,]=array(temp,dim=c(1, height/scale, width/scale,3))}
# Initiate Matrix
flat = matrix(0, 17, prod(dim(im)))
# Populating Matrix
for (i in 1:num) {
newim <- readJPEG(paste0("./images/", files[i]))
r=as.vector(im[i,,,1]); g=as.vector(im[i,,,2]);b=as.vector(im[i,,,3])
flat[i,] <- t(c(r, g, b))
}
# Creating Shoe Dataframe
shoes=as.data.frame(t(flat))
#Plot the shoe images
par(mfrow=c(3,3))
par(mai=c(.3,.3,.3,.3))
for (i in 1:num){
plot_jpeg(writeJPEG(im[i,,,]))
}
Scaling allows us to use either the correlation matrix as it would be the same to the covariance matrix.
# Scale the shoe dataframe
scaled=scale(shoes, center = TRUE, scale = TRUE)
mean.shoe=attr(scaled, "scaled:center")
std.shoe=attr(scaled, "scaled:scale")
# Correlation
Sigma_ = cor(scaled)
# Eigen
myeigen=eigen(Sigma_)
I’m not sure how this works. Is this accounting for the variance in each shoe? or the variance in each pixel of each shoe?
cumsum(myeigen$values) / sum(myeigen$values)
## [1] 0.6928202 0.7940449 0.8451072 0.8723847 0.8913841 0.9076337 0.9216282
## [8] 0.9336889 0.9433871 0.9524454 0.9609037 0.9688907 0.9765235 0.9832209
## [15] 0.9894033 0.9953587 1.0000000
I’m not sure what this is in terms of matrix math, but it seems very close to the SVD equation \(A = U\Sigma V^T\) where A = eigenshoes, U = scaled, \(\Sigma=\)scaling, and \(V^{T}=\)meyeigen_vectors. Is this the correct interpretation for this equation?
scaling=diag(myeigen$values^(-1/2)) / (sqrt(nrow(scaled)-1))
eigenshoes=scaled%*%myeigen$vectors%*%scaling
imageShow(array(eigenshoes[,1], c(60,125,3))) #showing first eigenshoe
newdata=im
dim(newdata)=c(num,height*width*3/scale^2)
mypca=princomp(t(as.matrix(newdata)), scores=TRUE, cor=TRUE)
mypca2=t(mypca$scores)
dim(mypca2)=c(17,height/scale,width/scale,3)
par(mfrow=c(4,5))
par(mai=c(.001,.001,.001,.001))
for (i in 1:num){#plot the 17 Eigenshoes
plot_jpeg(writeJPEG(mypca2[i,,,], bg="black")) #complete without reduction
}
a = round(mypca$sdev[1:num]^2/sum(mypca$sdev^2),3)
cumsum(a)
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8 Comp.9 Comp.10
## 0.693 0.794 0.845 0.872 0.891 0.907 0.921 0.933 0.943 0.952
## Comp.11 Comp.12 Comp.13 Comp.14 Comp.15 Comp.16 Comp.17
## 0.960 0.968 0.976 0.983 0.989 0.995 1.000
We see that we only need 3 eigenshoes to account for 84.5% of the variability in the pixels(or shoes, still not sure). We can see these below:
I have a question about why using imageShow() vs the plot_jpeg() function created at the beginning of this script produce different eigenshoe images. Above we have the eigenshoes using the PCA components without reduction, while below we have the first three eigenshoes taken from the eigenshoes matrix.
Are the eigen shoes different from the PCA shoes because one is using reduction and one is not using reduction?
Showing shoe 1 as an example because I couldn’t figure out how to plot all the reconstructed shoes. Using plot_jpeg function I get a much different shoe, and I couldn’t convert the dimensions.
# Run svd on the scaled matrix
shoe_svd <- svd(scaled)
# increasing the number of components to a max of 17 from which to rebuild the originals
# will make the reconstructed show look more like the original
num_components <- 3
# Create the reconstruction matrix using the first 3 components
shoe_reconstruction <- shoe_svd$u[,1:num_components]%*% diag(shoe_svd$d[1:num_components]) %*% t(shoe_svd$v[,1:num_components])
# changing the row of im to any number between 2:17 will change the shoe
plot_jpeg(writeJPEG(im[1,,,]))
# changing the column of shoe_reconstruction to any number between 2:17 will change the shoe
imageShow(array(shoe_reconstruction[,1],c(60,125,3)))