With the attached data file, build and visualize eigenimagery that accounts for 80% of the variability. Provide full R code and discussion.

library(doParallel)
## Loading required package: foreach
## Loading required package: iterators
## Loading required package: parallel
library(foreach)
library(iterators)
library(jpeg) #for readJPEG()
library(EBImage) #for the resize() function
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("EBImage")
## Bioconductor version 3.18 (BiocManager 1.30.22), R 4.3.2 (2023-10-31 ucrt)
## Warning: package(s) not installed when version(s) same as or greater than current; use
##   `force = TRUE` to re-install: 'EBImage'
## Installation paths not writeable, unable to update packages
##   path: C:/Program Files/R/R-4.3.2/library
##   packages:
##     cluster, foreign, lattice, MASS, Matrix, mgcv, nlme, rpart, survival
## Old packages: 'cli', 'digest', 'rlang', 'xfun'
files=list.files(path='C:/Users/Uzma/OneDrive/Desktop/Documents/jpg',pattern="\\.jpg")
files
##  [1] "RC_2500x1200_2014_us_53446.jpg" "RC_2500x1200_2014_us_53455.jpg"
##  [3] "RC_2500x1200_2014_us_53469.jpg" "RC_2500x1200_2014_us_53626.jpg"
##  [5] "RC_2500x1200_2014_us_53632.jpg" "RC_2500x1200_2014_us_53649.jpg"
##  [7] "RC_2500x1200_2014_us_53655.jpg" "RC_2500x1200_2014_us_53663.jpg"
##  [9] "RC_2500x1200_2014_us_53697.jpg" "RC_2500x1200_2014_us_54018.jpg"
## [11] "RC_2500x1200_2014_us_54067.jpg" "RC_2500x1200_2014_us_54106.jpg"
## [13] "RC_2500x1200_2014_us_54130.jpg" "RC_2500x1200_2014_us_54148.jpg"
## [15] "RC_2500x1200_2014_us_54157.jpg" "RC_2500x1200_2014_us_54165.jpg"
## [17] "RC_2500x1200_2014_us_54172.jpg"

Rescaling the iamges for viewing by initializing a function

height=1200
width=2500
scale=20
plot_Image = function(path, add=FALSE) #initialize function
{
  require('jpeg')
  jpg = readJPEG(path, native=T) # read the file
  res = dim(jpg)[2:1] # get the resolution, [x is 2, y is 1]
  if (!add) # initialize an empty plot area if add==FALSE
    plot(1,1,xlim=c(1,res[1]),ylim=c(1,res[2]), #set the X Limits by size
         asp=1, #aspect ratio
         type='n', #don't plot
         xaxs='i',yaxs='i',#prevents expanding axis windows +6% as normal
         xaxt='n',yaxt='n',xlab='',ylab='', # no axes or labels
         bty='n') # no box around graph
  rasterImage(jpg,1,1,res[1],res[2]) #image, xleft,ybottom,xright,ytop
}

Loading the images data into an array

#initialize array with zeros.
shoeIMG=array(rep(0,length(files)*height/scale*width/scale*3),
         #set dimension to N, x, y, 3 colors, 4D array)
         dim=c(length(files), height/scale, width/scale,3)) 

for (i in 1:length(files)){
  #define file to be read
  tmp=paste0("C:/Users/Uzma/OneDrive/Desktop/Documents/jpg/", files[i])
  #read the file
  temp=EBImage::resize(readJPEG(tmp),height/scale, width/scale)
  #assign to the array
  shoeIMG[i,,,]=array(temp,dim=c(1, height/scale, width/scale,3))
}

Plotting the images of shoes which are in 3 X 3 table

par(mfrow=c(3,3)) #set graphics to 3 x 3 table
par(mai=c(.3,.3,.3,.3)) #set margins 
for (i in 1:8){  #plot the first images only
plot_Image(writeJPEG(shoeIMG[i,,,]))
}

### Principal Component Analysis:

Standardizing the data by redimensioning it into two dimensional matrix

height=1200
width=2500
scale=20

scaled_Data=shoeIMG

dim(scaled_Data)=c(length(files),height*width*3/scale^2)
dim(scaled_Data)
## [1]    17 22500

Calculating the Principal Components by setting the Scores and Corelation TRUE

where the scores represent the eigen vector

pca_Result = princomp(t(as.matrix(scaled_Data)), scores=TRUE, cor=TRUE)

pca_Result
## Call:
## princomp(x = t(as.matrix(scaled_Data)), cor = TRUE, scores = TRUE)
## 
## Standard deviations:
##    Comp.1    Comp.2    Comp.3    Comp.4    Comp.5    Comp.6    Comp.7    Comp.8 
## 3.4319009 1.3118000 0.9316975 0.6809679 0.5683219 0.5255886 0.4877556 0.4528049 
##    Comp.9   Comp.10   Comp.11   Comp.12   Comp.13   Comp.14   Comp.15   Comp.16 
## 0.4060420 0.3924175 0.3791956 0.3684830 0.3602187 0.3374253 0.3241916 0.3181866 
##   Comp.17 
## 0.2808942 
## 
##  17  variables and  22500 observations.

Varifying that the sum of Varience = 1

variance = sum(pca_Result$sdev^2/sum(pca_Result$sdev^2))

variance
## [1] 1

Finding the nnumber of components that accounts for 80% of variability

First 5 component accounts for 80% of variability

final_Comp = pca_Result$sdev^2/sum(pca_Result$sdev^2)

sum(final_Comp[1:3])
## [1] 0.8451073

First 10 component accounts for 90% of variability

sum(final_Comp[1:6])
## [1] 0.9076338

Tranposing the Eigen Vector Scores to repoduce matrix for replotting

These are the most important images where the first image accounts for the most variability among others.

pca_result2=t(pca_Result$scores)
dim(pca_result2)=c(length(files),height/scale,width/scale,3)
par(mfrow=c(5,5))
par(mai=c(.001,.001,.001,.001))
for (i in 1:17){  #plot the first 81 Eigenshoes only
plot_Image(writeJPEG(pca_result2[i,,,], quality=1,bg="white"))
}