knitr::opts_chunk$set(echo = TRUE)
#libraries
library(EBImage)
library(jpeg)
With the attached data files, build and visualize eigenimagery that accounts for 80% of the variability. Provide full R code and discussion.
Eigenimagery is the method of analyzing an image or set of images to recognize key features. The most common use of this is in Eigenfaces. Eigenfaces uses the concept of eigenimagery to create a face recognition system that is fed a huge collection of face images, gathers key features of those images and can then use that to detection faces in new images provided or other medias.
For this project, we will be focusing on 17 data files in the jpg folder provided which is all running shoes.
height=1200; width=2500;scale=20
plot_jpeg = function(path, add=FALSE)
{ jpg = readJPEG(path, native=T) # read the file
res = dim(jpg)[2:1] # get the resolution, [x, y]
if (!add) # initialize an empty plot area if add==FALSE
plot(1,1,xlim=c(1,res[1]),ylim=c(1,res[2]),asp=1,type='n',xaxs='i',yaxs='i',xaxt='n',yaxt='n',xlab='',ylab='',bty='n')
rasterImage(jpg,1,1,res[1],res[2])
}
I added my jpg folder to the same one that holds this RMarkdown so the path is only ‘jpg/’. This step is to take all the images in the jpg files, resize, and add them to an array.
#setup
num=20
files=list.files(path='jpg/',pattern="\\.jpg")
#initialize array with zeros.
im=array(rep(0,length(files)*height/scale*width/scale*3),
#set dimension to N, x, y, 3 colors, 4D array)
dim=c(length(files), height/scale, width/scale,3))
for (i in 1:length(files)){
#define file to be read
tmp=paste0("jpg/", files[i])
#read the file
temp=EBImage::resize(readJPEG(tmp),height/scale, width/scale)
#assign to the array
im[i,,,]=array(temp,dim=c(1, height/scale, width/scale,3))
}
This is to set that array, im to a newim vector. The end goal is a dataframe.
flat=matrix(0, 17, prod(dim(im)))
for (i in 1:17) {
newim <- readJPEG(paste0("jpg/", files[i]))
r=as.vector(im[i,,,1]); g=as.vector(im[i,,,2]);b=as.vector(im[i,,,3])
flat[i,] <- t(c(r, g, b))
}
shoes=as.data.frame(t(flat))
par(mfrow=c(3,3)) #set graphics to 3 x 3 table
par(mai=c(.3,.3,.3,.3)) #set margins
for (i in 1:17){ #plot the first images only - we have 17 jpgs in the folder
plot_jpeg(writeJPEG(im[i,,,]))
}
height=1200; width=2500;scale=20
newdata=im
dim(newdata)=c(length(files),height*width*3/scale^2)
mypca=princomp(t(as.matrix(newdata)), scores=TRUE, cor=TRUE)
sum(mypca$sdev^2/sum(mypca$sdev^2)) #verify that sum of variance=1
## [1] 1
To get a 80% of variability we would only need 2 or 3.
mycomponents=mypca$sdev^2/sum(mypca$sdev^2)
sum(mycomponents[1:2])
## [1] 0.7940449
sum(mycomponents[1:3])
## [1] 0.8451073
mypca2=t(mypca$scores)
dim(mypca2)=c(length(files),height/scale,width/scale,3)
par(mfrow=c(5,5))
par(mai=c(.001,.001,.001,.001))
for (i in 1:17){ #plot the first 17 Eigenshoes only
plot_jpeg(writeJPEG(mypca2[i,,,], quality=1,bg="white"))
}
Below is to check the variability of each of the new eigenshoes.
a=round(mypca$sdev[1:17]^2/ sum(mypca$sdev^2),3)
cumsum(a)
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8 Comp.9 Comp.10
## 0.693 0.794 0.845 0.872 0.891 0.907 0.921 0.933 0.943 0.952
## Comp.11 Comp.12 Comp.13 Comp.14 Comp.15 Comp.16 Comp.17
## 0.960 0.968 0.976 0.983 0.989 0.995 1.000