With the attached data file, build and visualize eigenimagery that accounts for 80% of the variability. Provide full R code and discussion.
library(stringr)
library(OpenImageR)
library(recolorize)
##
## Attaching package: 'recolorize'
## The following object is masked from 'package:OpenImageR':
##
## readImage
library(ggplot2)
filepath <- paste0(getwd(),'/jpg')
list_of_files <- list.files(path='./jpg',pattern = ".jpg$")
picpath <- file.path(filepath,list_of_files[1])
img <- readImage(picpath)
#First Example Shoe Image
imageShow(img)
img_mat <- matrix(0,ncol=length(list_of_files),prod(dim(img)))
#Loading Image Guide: https://cran.r-project.org/web/packages/OpenImageR/vignettes/The_OpenImageR_package.html
#Helpful for understanding components of loaded img array and how to use them: https://cran.r-project.org/web/packages/recolorize/vignettes/step01_loading.html
for (i in 1:length(list_of_files)){
tmp_img <- file.path(filepath,list_of_files[i])
func_img <- readImage(tmp_img)
r_img <- as.vector(func_img[,,1])
g_img <- as.vector(func_img[,,2])
b_img <- as.vector(func_img[,,3])
img_mat[,i] <- c(r_img,g_img,b_img)
}
scaled_imgs <- scale(as.data.frame(img_mat),center=TRUE,scale=TRUE)
shoe_mean <- attr(scaled_imgs, "scaled:center")
shoe_sd <- attr(scaled_imgs, "scaled:scale")
The correlation matrix is used to obtain the variance between each image vector which can then be used to determine the principle components that make up the variance across all shoes by finding the eigenspace.
cor_mat <- cor(scaled_imgs)
eigen_data <- eigen(cor_mat)
pc_var <- cumsum(eigen_data$values)/sum(eigen_data$values)
var_threshold <- head(eigen_data$values/sum(eigen_data$values),which(pc_var>0.8)[1])
ggplot(as.data.frame(pc_var),aes(x=seq(1:length(pc_var)),y=pc_var)) +
geom_line() +
theme(axis.text.x = element_text(angle = 90, vjust = 1, hjust=1)) +
geom_hline(yintercept=0.8,linetype='dashed') +
xlab("Principal Component") +
ylab("Variance Explained") +
labs(title='Cumulative Variance Explained by PCs',subtitle = '>80% variance explained by the principal components')
ggplot(as.data.frame(var_threshold),aes(x=paste0("PC",seq(1:length(var_threshold))),y=var_threshold)) +
geom_bar(stat='identity') +
xlab("Principal Component") +
ylab("Variance Explained") +
labs(title='Scree Plot')
\[AW^TV = U\] Source: https://www.geeksforgeeks.org/singular-value-decomposition-svd/ \[A\]: Original scaled matrix of shoe image vectors \[V\]: matrix containing the orthonormal eigenvectors of \[A^{T}A\] \[W\]: transpose of nxn diagonal matrix of the square roots of the eigenvalues of \[A^{T}A\]
w_t <- eigen_data$values[1:which(pc_var>0.8)[1]]^(-1/2)/(sqrt(nrow(scaled_imgs)-1))
eigenshoes <- scaled_imgs %*% eigen_data$vectors[,1:which(pc_var>0.8)[1]] %*% w_t
#reshaping eigenshoe single vector into jpg array format using initial loaded image dimensions
imageShow(array(eigenshoes[,1],dim(img)))