Assignment

With the attached data file, build and visualize eigenimagery that accounts for 80% of the variability. Provide full R code and discussion.

Sources:

The following code utilizes the code and information from the following sources.

  1. https://rpubs.com/R-Minator/eigenshoes
  2. https://rpubs.com/dherrero12/543854
  3. https://www.r-bloggers.com/2017/01/image-compression-with-principal-component-analysis/
library(jpeg)
library(OpenImageR)
library(imager)
library(EBImage)

Get the dimensions of an image to be used later on.

# Dimensions

image <- readJPEG(paste("jpg/","RC_2500x1200_2014_us_53446.jpg",sep=""))
# Image Dimensions
height <- nrow(image)
width <- ncol(image)

#Scaling Factor
scale <- 20

Display all 17 images of shoes.

# Images 

names <- list.files(path = "jpg/", pattern = "jpg")
num = length(names)

par(mfrow=c(5,4))
par(mai=c(.02,.02,.02,.02))
for(x in names)
{
  i <- readJPEG(paste("jpg/",x,sep=""),native=TRUE)
  plot(0:1,0:1,type="n",ann=FALSE,axes=FALSE)
  rasterImage(i,0,0,1,1)
}

All shoe images will be resized by a scale factor of 20 decreasing the dimensions of the images, as seen by the dimensions below. This requires less computational power and resources to perform analysis.

# Resize images

im <- array(rep(0,length(names)*height/scale*width/scale*3), dim=c(length(names), height/scale, width/scale,3))

for (i in 1:num){
  temp=resizeImage(readJPEG(paste0("jpg/", names[i])),
                   height/scale,
                   width/scale)
  if(i==1)
  {
    print(dim(readJPEG(paste0("jpg/", names[i]))))
    print(dim(temp))
  }
  im[i,,,] =array(temp,dim=c(1, height/scale, width/scale,3))}
## [1] 1200 2500    3
## [1]  60 125   3

Collect the RGB vectors of each image in a list and add its transpose to the data matrix. Turn the data matrix into a dataframe.

# Collect RBG vectors of all images
# Create image dataframe
data <- matrix(0, num, prod(dim(im))) 

for (i in 1:length(names)) {

  r=as.vector(im[i,,,1])
  g=as.vector(im[i,,,2])
  b=as.vector(im[i,,,3])
  
  data[i,] <- t(c(r, g, b))
}

images <- as.data.frame(t(data))

Scale the images.

# Scale images
scaled = scale(images, center = TRUE, scale =  TRUE)

Compute the correlation/covariance matrix from the scaled matrix.

cov_m <- cor(scaled)

Compute the eigenvalues and eigenvectors of the covarience matrix.

e <- eigen(cov_m)
eigenvalues <- e$values
eigenvectors <- e$vectors

Calculate the average eigenvalues and find the smallest value that gives 80% variability (threshold).

cum.var <- cumsum(eigenvalues) / sum(eigenvalues)
cum.var
##  [1] 0.6833797 0.7836120 0.8350410 0.8629807 0.8827157 0.8996344 0.9143339
##  [8] 0.9269997 0.9375131 0.9474671 0.9565155 0.9650405 0.9734219 0.9805532
## [15] 0.9875810 0.9943436 1.0000000
thres <- min(which(cum.var >= 0.80))

Calculate the Eigenimages matrix by multiplying the original matrix by eigenvectors ties the eigenvalues diagonal matrix.

scaling = diag(eigenvalues[1:thres]^(-1/2)) / (sqrt(nrow(scaled)-1))

eigenimages <- scaled%*%eigenvectors[,1:thres]%*%scaling
# Show image that accounts for 80% variability for first image

par(mfrow=c(2,3))
imageShow(array(eigenimages[,1], c(60,125,3)))

Calculate the principal components

# Principal components

im_copy <- im
dim(im_copy) <- c(num,height*width*3/scale^2)
pc <- princomp(t(as.matrix(im_copy)), scores=TRUE, cor=TRUE)
pc2 <- t(pc$scores)
dim(pc2)<- c(num,height/scale,width/scale,3)


par(mfrow=c(5,5))
par(mai=c(.001,.001,.001,.001))
for(i in 1:num)
{
  x <- readJPEG(writeJPEG(pc2[i,,,], bg="white"))
  plot(0:1,0:1,type="n",ann=FALSE,axes=FALSE)
  rasterImage(x,0,0,1,1)
}

v=round(pc$sdev[1:num]^2/ sum(pc$sdev^2),3)
cumsum(v)
##  Comp.1  Comp.2  Comp.3  Comp.4  Comp.5  Comp.6  Comp.7  Comp.8  Comp.9 Comp.10 
##   0.683   0.783   0.834   0.862   0.882   0.899   0.914   0.927   0.938   0.948 
## Comp.11 Comp.12 Comp.13 Comp.14 Comp.15 Comp.16 Comp.17 
##   0.957   0.966   0.974   0.981   0.988   0.995   1.001

New dataset

d <- t(t(eigenimages)%*%scaled)