With the attached data file, build and visualize eigenimagery that accounts for 80% of the variability. Provide full R code and discussion.

Import Libraries

library(doParallel)
## Loading required package: foreach
## Loading required package: iterators
## Loading required package: parallel
library(foreach)
library(jpeg)
library(OpenImageR)
library(BiocManager) 
library(EBImage)
## 
## Attaching package: 'EBImage'
## The following objects are masked from 'package:OpenImageR':
## 
##     readImage, writeImage

There are 17 shoes images in the jpg folder.

files=list.files("/Users/chunjienan/Desktop/DATA605 Fundamentals of computational mathmatics/week4/jpg",pattern="\\.jpg")[1:17] 

Set shoes image function

height=1200; width=2500;scale=20
plot_jpeg = function(path, add=FALSE)
{ jpg = readJPEG(path, native=T) 
  res = dim(jpg)[2:1] 
  if (!add) 
    plot(1,1,xlim=c(1,res[1]),ylim=c(1,res[2]),asp=1,type='n',xaxs='i',yaxs='i',xaxt='n',yaxt='n',xlab='',ylab='',bty='n')
  rasterImage(jpg,1,1,res[1],res[2])
}

Load the data, resize and scale the images.

images=array(rep(0,length(files)*height/scale*width/scale*3), dim=c(length(files), height/scale, width/scale,3))

for (i in 1:17){
  temp=resize(readJPEG(paste0("/Users/chunjienan/Desktop/DATA605 Fundamentals of computational mathmatics/week4/jpg/", files[i])),height/scale, width/scale)
  images[i,,,]=array(temp,dim=c(1, height/scale, width/scale,3))}

Vectorize

mat=matrix(0, 17, prod(dim(images))) 
for (i in 1:17) {
  imp <- readJPEG(paste0("/Users/chunjienan/Desktop/DATA605 Fundamentals of computational mathmatics/week4/jpg/", files[i]))
  r=as.vector(images[i,,,1]); g=as.vector(images[i,,,2]);b=as.vector(images[i,,,3])
  mat[i,] <- t(c(r, g, b))
}
shoes=as.data.frame(t(mat))

Plot the shoes

par(mfrow=c(3,4))
par(mai=c(.3,.3,.3,.3))
for (i in 1:17){ 
plot_jpeg(writeJPEG(images[i,,,]))
}

Structure of Eigencomponents

scaled=scale(shoes, center = TRUE, scale = TRUE)
mean.shoe=attr(scaled, "scaled:center") 
std.shoe=attr(scaled, "scaled:scale") 
dim(scaled)
## [1] 382500     17
str(scaled)
##  num [1:382500, 1:17] 0.651 0.651 0.651 0.651 0.651 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : NULL
##   ..$ : chr [1:17] "V1" "V2" "V3" "V4" ...
##  - attr(*, "scaled:center")= Named num [1:17] 0.867 0.755 0.909 0.779 0.762 ...
##   ..- attr(*, "names")= chr [1:17] "V1" "V2" "V3" "V4" ...
##  - attr(*, "scaled:scale")= Named num [1:17] 0.205 0.343 0.194 0.327 0.328 ...
##   ..- attr(*, "names")= chr [1:17] "V1" "V2" "V3" "V4" ...

Correlation Matrix for eigencomponents.

Sigma_=cor(scaled)
dim(Sigma_)
## [1] 17 17
str(Sigma_)
##  num [1:17, 1:17] 1 0.787 0.678 0.654 0.727 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : chr [1:17] "V1" "V2" "V3" "V4" ...
##   ..$ : chr [1:17] "V1" "V2" "V3" "V4" ...

Eigencomponents

myeigen=eigen(Sigma_)
cumsum(myeigen$values) / sum(myeigen$values)
##  [1] 0.6928202 0.7940449 0.8451073 0.8723847 0.8913841 0.9076338 0.9216282
##  [8] 0.9336889 0.9433872 0.9524455 0.9609037 0.9688907 0.9765235 0.9832209
## [15] 0.9894033 0.9953587 1.0000000

Findout Engenshoes

The 80% variability is at 2nd position

scaling=diag(myeigen$values[1:2]^(-1/2)) / (sqrt(nrow(scaled)-1))
eigenshoes=scaled%*%myeigen$vectors[,1:2]%*%scaling
imageShow(array(eigenshoes[,2], c(60,125,3)))