With the attached data file, build and visualize eigenimagery that accounts for 80% of the variability. Provide full R code and discussion.
#load libraries
library(imager)
## Loading required package: magrittr
##
## Attaching package: 'imager'
## The following object is masked from 'package:magrittr':
##
## add
## The following objects are masked from 'package:stats':
##
## convolve, spectrum
## The following object is masked from 'package:graphics':
##
## frame
## The following object is masked from 'package:base':
##
## save.image
library(jpeg)
library(BiocManager)
BiocManager::install("EBImage")
## Bioconductor version 3.18 (BiocManager 1.30.22), R 4.3.1 (2023-06-16)
## Warning: package(s) not installed when version(s) same as or greater than current; use
## `force = TRUE` to re-install: 'EBImage'
## Old packages: 'jsonlite', 'xml2'
Make sure jpg folder is in the current working directory.
path <- file.path(getwd(), 'jpg4')
#path- "/users/marjetevucinaj/605/jpg4"
sfiles <- list.files(path = path, pattern = "*.jpg", full.names = TRUE)
set.seed(1234)
num <- length(sfiles)
print(num) #print number of images and dimensions
## [1] 17
image <- readJPEG(sfiles[1])
image_dimensions <- dim(image)
print(image_dimensions)
## [1] 1200 2500 3
#define dimensions
height <- 1200
width <- 2500
scale <- 20
channels <- 3
#create array
ar <- array(rep(0,num*height/scale*width/scale*3),
dim=c(num, height/scale, width/scale, 3))
for (i in 1:num) {
file <- sfiles[i] # use the full path directly
temp <- EBImage::resize(readJPEG(file), height/scale, width/scale)
ar[i,,,] <- array(temp, dim = c(1, height/scale, width/scale, channels))
}
dim(ar)
## [1] 17 60 125 3
#plot original shoes
plot_jpg <- function(path, add=FALSE) {
jpg <- readJPEG(path, native=T) # read the file
res <- dim(jpg)[2:1] # get the resolution, [x, y]
if (!add) {# initialize an empty plot area if add=FALSE
plot(1,1,xlim=c(1,res[1]),ylim=c(1,res[2]),asp=1,type='n',xaxs='i',
yaxs='i', xaxt='n',yaxt='n',
xlab='',ylab='',bty='n')
rasterImage(jpg,1,1,res[1],res[2])}
}
par(mfrow=c(3,3))
par(mai=c(.3,.3,.3,.3))
for (i in 1:num){
plot_jpg(writeJPEG(ar[i,,,]))
}
# make the image array into a vector
m=matrix(0, num, prod(dim(ar)))
for (i in 1:num) {
file <- sfiles[i]
image <- readJPEG(file)
red <- as.vector(ar[i,,,1])
green <- as.vector(ar[i,,,2])
blue <- as.vector(ar[i,,,3])
m[i,] <- t(c(red, green, blue))
}
shoes=as.data.frame(t(m))
dim(m)
## [1] 17 382500
dim(shoes)
## [1] 382500 17
#z scale data
scaled=scale(shoes, center = TRUE, scale = TRUE)
mean.shoe=attr(scaled, "scaled:center")
std.shoe=attr(scaled, "scaled:scale")
# store correlation of the scaled items
Sigma_=cor(scaled)
#Eigencomponents from the Covariance
myeigen=eigen(Sigma_)
cumsum(myeigen$values) / sum(myeigen$values)
## [1] 0.6928202 0.7940449 0.8451073 0.8723847 0.8913841 0.9076338 0.9216282
## [8] 0.9336889 0.9433872 0.9524455 0.9609037 0.9688907 0.9765235 0.9832209
## [15] 0.9894033 0.9953587 1.0000000
# scales the data and computes the eigenshoes using the second to fourth eigenvectors ensuring more than 80%
scaling=diag(myeigen$values[2:4]^(-1/2))/(sqrt(nrow(scaled)-1))
eigenshoes=scaled%*%myeigen$vectors[,2:4]%*%scaling
# princinple component analysis on the eigenshoe array
newdata=ar
dim(newdata)=c(num,height*width*3/scale^2)
mypca=princomp(t(as.matrix(newdata)), scores=TRUE, cor=TRUE)
# generate eigenshoes
mypca2=t(mypca$scores)
dim(mypca2)=c(num,height/scale,width/scale,3)
par(mfrow=c(5,5))
par(mai=c(.001,.001,.001,.001))
for (i in 1:num){#plot the Eigenshoes
plot_jpg(writeJPEG(mypca2[i,,,], bg="white"))
}
# scales the data and computes the eigenshoes using the 14 to 16 eigenvectors
scaling=diag(myeigen$values[14:16]^(-1/2))/(sqrt(nrow(scaled)-1))
eigenshoes2=scaled%*%myeigen$vectors[,14:16]%*%scaling
# pca on the eigenshoe array
newdata=ar
dim(newdata)=c(num,height*width*3/scale^2)
mypca3=princomp(t(as.matrix(newdata)), scores=TRUE, cor=TRUE)
# generate eigenshoes
mypca3=t(mypca3$scores)
dim(mypca3)=c(num,height/scale,width/scale,3)
par(mfrow=c(5,5))
par(mai=c(.001,.001,.001,.001))
for (i in 1:num){#plot the Eigenshoes
plot_jpg(writeJPEG(mypca3[i,,,], bg="white"))
}
```
After I loaded the libraries and read the image from jpg4 folder, I resized them to reduce the dimensions. I also plotted the original shoes to visualize them. Next I converted the images into a matrix or vector. I also found the eigenvalues and eigenvectors to identify key patterns in the shoes and used 2 to 4 eigenvectors, scaling and generating eignshoes. Essentially capturing the dominant patterns of the data to ensure more than 80% variability. I also attempted to use 14 to 16 eigenvectors to visualize the difference but the plots look the same so I’m unsure if thats because the variability is so high or if I made an error in my code.