Assignment 4

With the attached data file, build and visualize eigenimagery that accounts for 80% of the variability. Provide full R code and discussion.

Load Photos

numfiles <- length(list.files("./jpg",pattern="\\.jpg")) #get the total number of image files

shoe_jpg <- list.files("./jpg",pattern="\\.jpg")[1:numfiles]

Shoes Function for the images

image_plot = function(path, add=FALSE)
{ jpg = readJPEG(path, native=T) # read the file
  resol = dim(jpg)[2:1] # get the resolution, [x, y]
  if (!add) # initialize an empty plot area if add==FALSE
    plot(1,1,xlim=c(1,resol[1]),ylim=c(1,resol[2]),asp=1,type='n',xaxs='i',yaxs='i',xaxt='n',yaxt='n',xlab='',ylab='',bty='n')
  rasterImage(jpg,1,1,resol[1],resol[2])
}

Load the images into the img_array

height <- 1200
width <- 2500
scale <- 20

# Creating Empty Array
img_array <- array(rep(0,numfiles*height/scale*width/scale*3), dim=c(numfiles, height/scale, width/scale,3))

for (i in 1:numfiles){
  temp <- EBImage::resize(readJPEG(paste0("./jpg/", shoe_jpg[i])),height/scale, width/scale)
  img_array[i,,,]=array(temp,dim=c(1, height/scale, width/scale,3))}

Vectorize the img_array

vimageMtrx <- matrix(0, numfiles, prod(dim(img_array)))

for (i in 1:numfiles) {
  vimg <- readJPEG(paste0("./jpg/", shoe_jpg[i])) #not used
  
  r <- as.vector(img_array[i,,,1])
  g <- as.vector(img_array[i,,,2])
  b <- as.vector(img_array[i,,,3])
  vimageMtrx[i,] <- t(c(r, g, b))
}

shoes=as.data.frame(t(vimageMtrx))

Shoe Image Plot

par(mfrow=c(3,3))
par(mai=c(.3,.3,.3,.3))

for (i in 1:numfiles){
  image_plot(writeJPEG(img_array[i,,,]))
}

imgscale <- scale(shoes, center = TRUE, scale = TRUE)
mean.shoe <- attr(imgscale, "scaled:center") 
std.shoe <- attr(imgscale, "scaled:scale")

Calculate the Covariance (Correlation) and get the Eigencomponents

myCor <- cor(imgscale)

myeigen <- eigen(myCor) #calculate the Eigenvalues

cumsum(myeigen$values) / sum(myeigen$values) #get the Eigencomponents
##  [1] 0.6928202 0.7940449 0.8451073 0.8723847 0.8913841 0.9076338 0.9216282
##  [8] 0.9336889 0.9433872 0.9524455 0.9609037 0.9688907 0.9765235 0.9832209
## [15] 0.9894033 0.9953587 1.0000000

Eigenshoes

The 80% variability is found at position 2

scaling <- diag(myeigen$values[1:2]^(-1/2)) / (sqrt(nrow(imgscale)-1))

eigenshoes <- imgscale%*%myeigen$vectors[,1:2]%*%scaling

imageShow(array(eigenshoes[,2], c(60,125,3))) 

Generate the principal components using the princomp() function

newdata <- img_array

dim(newdata) <- c(numfiles,height*width*3/scale^2)

mypca=princomp(t(as.matrix(newdata)), scores=TRUE, cor=TRUE)

Generate all eigenshoes

mypca2 <- t(mypca$scores)
dim(mypca2) <- c(numfiles,height/scale,width/scale,3)
par(mfrow=c(3,3))
par(mai=c(.001,.001,.001,.001))

for (i in 1:numfiles){
  image_plot(writeJPEG(mypca2[i,,,]))  
}