knitr::opts_chunk$set(echo = TRUE)
#libraries
library(EBImage)
library(jpeg)

Problem Set 1 - eigenshoes

With the attached data files, build and visualize eigenimagery that accounts for 80% of the variability. Provide full R code and discussion.

Eigenimagery is the method of analyzing an image or set of images to recognize key features. The most common use of this is in Eigenfaces. Eigenfaces uses the concept of eigenimagery to create a face recognition system that is fed a huge collection of face images, gathers key features of those images and can then use that to detection faces in new images provided or other medias.

For this project, we will be focusing on 17 data files in the jpg folder provided which is all running shoes.

View shoes function

height=1200; width=2500;scale=20
plot_jpeg = function(path, add=FALSE)
{ jpg = readJPEG(path, native=T) # read the file
  res = dim(jpg)[2:1] # get the resolution, [x, y]
  if (!add) # initialize an empty plot area if add==FALSE
    plot(1,1,xlim=c(1,res[1]),ylim=c(1,res[2]),asp=1,type='n',xaxs='i',yaxs='i',xaxt='n',yaxt='n',xlab='',ylab='',bty='n')
  rasterImage(jpg,1,1,res[1],res[2])
}

Load the data

I added my jpg folder to the same one that holds this RMarkdown so the path is only ‘jpg/’. This step is to take all the images in the jpg files, resize, and add them to an array.

#setup
num=20
files=list.files(path='jpg/',pattern="\\.jpg")

#initialize array with zeros.
im=array(rep(0,length(files)*height/scale*width/scale*3),
         #set dimension to N, x, y, 3 colors, 4D array)
         dim=c(length(files), height/scale, width/scale,3)) 

for (i in 1:length(files)){
  #define file to be read
  tmp=paste0("jpg/", files[i])
  #read the file
  temp=EBImage::resize(readJPEG(tmp),height/scale, width/scale)
  #assign to the array
  im[i,,,]=array(temp,dim=c(1, height/scale, width/scale,3))
}

Vectorize

This is to set that array, im to a newim vector. The end goal is a dataframe.

flat=matrix(0, 17, prod(dim(im))) 
for (i in 1:17) {
  newim <- readJPEG(paste0("jpg/", files[i]))
  r=as.vector(im[i,,,1]); g=as.vector(im[i,,,2]);b=as.vector(im[i,,,3])
  flat[i,] <- t(c(r, g, b))
}
shoes=as.data.frame(t(flat))

Plots

par(mfrow=c(3,3)) #set graphics to 3 x 3 table
par(mai=c(.3,.3,.3,.3)) #set margins 
for (i in 1:17){  #plot the first images only - we have 17 jpgs in the folder
  plot_jpeg(writeJPEG(im[i,,,]))
}

Generate Principal Components

height=1200; width=2500;scale=20
newdata=im
dim(newdata)=c(length(files),height*width*3/scale^2)
mypca=princomp(t(as.matrix(newdata)), scores=TRUE, cor=TRUE)
sum(mypca$sdev^2/sum(mypca$sdev^2)) #verify that sum of variance=1
## [1] 1

To get a 80% of variability we would only need 2 or 3.

mycomponents=mypca$sdev^2/sum(mypca$sdev^2)
sum(mycomponents[1:2]) 
## [1] 0.7940449
sum(mycomponents[1:3]) 
## [1] 0.8451073

Generate Eigenshoes

mypca2=t(mypca$scores)
dim(mypca2)=c(length(files),height/scale,width/scale,3)
par(mfrow=c(5,5))
par(mai=c(.001,.001,.001,.001))
for (i in 1:17){  #plot the first 17 Eigenshoes only
plot_jpeg(writeJPEG(mypca2[i,,,], quality=1,bg="white"))
}

Below is to check the variability of each of the new eigenshoes.

a=round(mypca$sdev[1:17]^2/ sum(mypca$sdev^2),3)
cumsum(a)
##  Comp.1  Comp.2  Comp.3  Comp.4  Comp.5  Comp.6  Comp.7  Comp.8  Comp.9 Comp.10 
##   0.693   0.794   0.845   0.872   0.891   0.907   0.921   0.933   0.943   0.952 
## Comp.11 Comp.12 Comp.13 Comp.14 Comp.15 Comp.16 Comp.17 
##   0.960   0.968   0.976   0.983   0.989   0.995   1.000