## Week 4, Linear Transformations & Representations

**
With the provided data file, build and visualize eigenimagery that accounts for 80% of the variability. Provide full R code and discussion.
**

Setting Up The Data

Load packages:

library(imager)
library(jpeg)
library(EBImage)
library(recolorize)
library(OpenImageR)
library(stats)

Read the image files into R:

# read the image files into R
filepath <- "/Users/joshiden/Documents/Classes/CUNY SPS/Fall 2022/DATA 605 /Week 4/jpg/"
files <- list.files(filepath)
num <- length(files) # number of files
print(files)
##  [1] "RC_2500x1200_2014_us_53446.jpg" "RC_2500x1200_2014_us_53455.jpg"
##  [3] "RC_2500x1200_2014_us_53469.jpg" "RC_2500x1200_2014_us_53626.jpg"
##  [5] "RC_2500x1200_2014_us_53632.jpg" "RC_2500x1200_2014_us_53649.jpg"
##  [7] "RC_2500x1200_2014_us_53655.jpg" "RC_2500x1200_2014_us_53663.jpg"
##  [9] "RC_2500x1200_2014_us_53697.jpg" "RC_2500x1200_2014_us_54018.jpg"
## [11] "RC_2500x1200_2014_us_54067.jpg" "RC_2500x1200_2014_us_54106.jpg"
## [13] "RC_2500x1200_2014_us_54130.jpg" "RC_2500x1200_2014_us_54148.jpg"
## [15] "RC_2500x1200_2014_us_54157.jpg" "RC_2500x1200_2014_us_54165.jpg"
## [17] "RC_2500x1200_2014_us_54172.jpg"

Preview the first image:

# preview the first image
sample_img <- readJPEG(paste0(filepath,files[1]))
imageShow(sample_img)

Set the image parameters:

# set parameters
height <- 1200
width <- 2500
scale <- 20
channels <- 3

Load the data into an array using the established image parameters:

# load the data into an array 
im <- array(rep(0,num*height/scale*width/scale*3),
            dim=c(num, height/scale, width/scale, 3))

for (i in 1:num){
  file <- paste0(filepath,files[i])
  temp <- EBImage::resize(readJPEG(file),height/scale, width/scale)
  im[i,,,] <- array(temp,dim=c(1, height/scale, width/scale,3))
}

dim(im)
## [1]  17  60 125   3

View the structure of the array:

str(im)
##  num [1:17, 1:60, 1:125, 1:3] 1 1 1 1 1 1 1 1 1 1 ...

Plot the shoes (using Professor Fulton’s plot_jpg() function):

plot_jpg <- function(path, add=FALSE) {
  jpg <- readJPEG(path, native=T) # read the file
  res <- dim(jpg)[2:1] # get the resolution, [x, y]
  if (!add) {# initialize an empty plot area if add=FALSE
    plot(1,1,xlim=c(1,res[1]),ylim=c(1,res[2]),asp=1,type='n',xaxs='i',
                                    yaxs='i', xaxt='n',yaxt='n',
                                    xlab='',ylab='',bty='n')
  rasterImage(jpg,1,1,res[1],res[2])}
}

par(mfrow=c(3,3))
par(mai=c(.3,.3,.3,.3))
for (i in 1:num){
  plot_jpg(writeJPEG(im[i,,,]))
}

Vectorize the image array:

mat=matrix(0, num, prod(dim(im))) 
for (i in 1:num) {
  file <- paste0(filepath,files[i])
  image <- readJPEG(file)
  red <- as.vector(im[i,,,1]) 
  green <- as.vector(im[i,,,2])
  blue <- as.vector(im[i,,,3])
  mat[i,] <- t(c(red, green, blue))
}
shoes=as.data.frame(t(mat))
dim(mat)
## [1]     17 382500

Check the shoes dimension:

dim(shoes)
## [1] 382500     17

###

Get Eigencomponents From Correlation Structure

Z-scale the data to mean 0, standard deviation 1:

scaled=scale(shoes, center = TRUE, scale = TRUE)
mean.shoe=attr(scaled, "scaled:center") #saving for classification
std.shoe=attr(scaled, "scaled:scale")  #saving for classification...later

Calculate and store the Covariance. This is the correlation of the scaled items.

Sigma_=cor(scaled)

Get the Eigencomponents from the Covariance

myeigen=eigen(Sigma_)
cumsum(myeigen$values) / sum(myeigen$values)
##  [1] 0.6928202 0.7940449 0.8451073 0.8723847 0.8913841 0.9076338 0.9216282
##  [8] 0.9336889 0.9433872 0.9524455 0.9609037 0.9688907 0.9765235 0.9832209
## [15] 0.9894033 0.9953587 1.0000000

We can see the first two sets of eigenvalues (shoes) account for 78% with 80% just between the second and third sets of shoes, so a linear combination of the first two shoes accounts for about 80% of the variance of the 17 shoes.

Calculate Eigenshoes

scaling=diag(myeigen$values[1:3]^(-1/2))/(sqrt(nrow(scaled)-1))
eigenshoes=scaled%*%myeigen$vectors[,1:3]%*%scaling

View the first shoe:

imageShow(array(eigenshoes[,1], c(60,125,3)))

And the second shoe (79.4%):

imageShow(array(eigenshoes[,2], c(60,125,3)))

And lastly the third shoe (84.5%):

imageShow(array(eigenshoes[,3], c(60,125,3)))

Generate the Eigenshoes from the Principal Components

We can also get the eigenshoes by the principal components of the images.

# princinple component analysis on the eigenshoe array
newdata=im
dim(newdata)=c(num,height*width*3/scale^2)
mypca=princomp(t(as.matrix(newdata)), scores=TRUE, cor=TRUE)

# generate eigenshoes
mypca2=t(mypca$scores)
dim(mypca2)=c(num,height/scale,width/scale,3)
par(mfrow=c(5,5))
par(mai=c(.001,.001,.001,.001))
for (i in 1:num){#plot the Eigenshoes 
plot_jpg(writeJPEG(mypca2[i,,,], bg="white"))  #complete without reduction
}