**
With the provided data file, build and visualize eigenimagery that
accounts for 80% of the variability. Provide full R code and
discussion.
**
Load packages:
library(imager)
library(jpeg)
library(EBImage)
library(recolorize)
library(OpenImageR)
library(stats)
Read the image files into R:
# read the image files into R
filepath <- "/Users/joshiden/Documents/Classes/CUNY SPS/Fall 2022/DATA 605 /Week 4/jpg/"
files <- list.files(filepath)
num <- length(files) # number of files
print(files)
## [1] "RC_2500x1200_2014_us_53446.jpg" "RC_2500x1200_2014_us_53455.jpg"
## [3] "RC_2500x1200_2014_us_53469.jpg" "RC_2500x1200_2014_us_53626.jpg"
## [5] "RC_2500x1200_2014_us_53632.jpg" "RC_2500x1200_2014_us_53649.jpg"
## [7] "RC_2500x1200_2014_us_53655.jpg" "RC_2500x1200_2014_us_53663.jpg"
## [9] "RC_2500x1200_2014_us_53697.jpg" "RC_2500x1200_2014_us_54018.jpg"
## [11] "RC_2500x1200_2014_us_54067.jpg" "RC_2500x1200_2014_us_54106.jpg"
## [13] "RC_2500x1200_2014_us_54130.jpg" "RC_2500x1200_2014_us_54148.jpg"
## [15] "RC_2500x1200_2014_us_54157.jpg" "RC_2500x1200_2014_us_54165.jpg"
## [17] "RC_2500x1200_2014_us_54172.jpg"
Preview the first image:
# preview the first image
sample_img <- readJPEG(paste0(filepath,files[1]))
imageShow(sample_img)
Set the image parameters:
# set parameters
height <- 1200
width <- 2500
scale <- 20
channels <- 3
Load the data into an array using the established image parameters:
# load the data into an array
im <- array(rep(0,num*height/scale*width/scale*3),
dim=c(num, height/scale, width/scale, 3))
for (i in 1:num){
file <- paste0(filepath,files[i])
temp <- EBImage::resize(readJPEG(file),height/scale, width/scale)
im[i,,,] <- array(temp,dim=c(1, height/scale, width/scale,3))
}
dim(im)
## [1] 17 60 125 3
View the structure of the array:
str(im)
## num [1:17, 1:60, 1:125, 1:3] 1 1 1 1 1 1 1 1 1 1 ...
Plot the shoes (using Professor Fulton’s plot_jpg() function):
plot_jpg <- function(path, add=FALSE) {
jpg <- readJPEG(path, native=T) # read the file
res <- dim(jpg)[2:1] # get the resolution, [x, y]
if (!add) {# initialize an empty plot area if add=FALSE
plot(1,1,xlim=c(1,res[1]),ylim=c(1,res[2]),asp=1,type='n',xaxs='i',
yaxs='i', xaxt='n',yaxt='n',
xlab='',ylab='',bty='n')
rasterImage(jpg,1,1,res[1],res[2])}
}
par(mfrow=c(3,3))
par(mai=c(.3,.3,.3,.3))
for (i in 1:num){
plot_jpg(writeJPEG(im[i,,,]))
}
Vectorize the image array:
mat=matrix(0, num, prod(dim(im)))
for (i in 1:num) {
file <- paste0(filepath,files[i])
image <- readJPEG(file)
red <- as.vector(im[i,,,1])
green <- as.vector(im[i,,,2])
blue <- as.vector(im[i,,,3])
mat[i,] <- t(c(red, green, blue))
}
shoes=as.data.frame(t(mat))
dim(mat)
## [1] 17 382500
Check the shoes dimension:
dim(shoes)
## [1] 382500 17
###
Z-scale the data to mean 0, standard deviation 1:
scaled=scale(shoes, center = TRUE, scale = TRUE)
mean.shoe=attr(scaled, "scaled:center") #saving for classification
std.shoe=attr(scaled, "scaled:scale") #saving for classification...later
Calculate and store the Covariance. This is the correlation of the scaled items.
Sigma_=cor(scaled)
Get the Eigencomponents from the Covariance
myeigen=eigen(Sigma_)
cumsum(myeigen$values) / sum(myeigen$values)
## [1] 0.6928202 0.7940449 0.8451073 0.8723847 0.8913841 0.9076338 0.9216282
## [8] 0.9336889 0.9433872 0.9524455 0.9609037 0.9688907 0.9765235 0.9832209
## [15] 0.9894033 0.9953587 1.0000000
We can see the first two sets of eigenvalues (shoes) account for 78% with 80% just between the second and third sets of shoes, so a linear combination of the first two shoes accounts for about 80% of the variance of the 17 shoes.
scaling=diag(myeigen$values[1:3]^(-1/2))/(sqrt(nrow(scaled)-1))
eigenshoes=scaled%*%myeigen$vectors[,1:3]%*%scaling
View the first shoe:
imageShow(array(eigenshoes[,1], c(60,125,3)))
And the second shoe (79.4%):
imageShow(array(eigenshoes[,2], c(60,125,3)))
And lastly the third shoe (84.5%):
imageShow(array(eigenshoes[,3], c(60,125,3)))
We can also get the eigenshoes by the principal components of the images.
# princinple component analysis on the eigenshoe array
newdata=im
dim(newdata)=c(num,height*width*3/scale^2)
mypca=princomp(t(as.matrix(newdata)), scores=TRUE, cor=TRUE)
# generate eigenshoes
mypca2=t(mypca$scores)
dim(mypca2)=c(num,height/scale,width/scale,3)
par(mfrow=c(5,5))
par(mai=c(.001,.001,.001,.001))
for (i in 1:num){#plot the Eigenshoes
plot_jpg(writeJPEG(mypca2[i,,,], bg="white")) #complete without reduction
}