Prep Environment Package and Library Load

#install.packages("imager")
#install.packages("BiocManager") 
#BiocManager::install("EBImage")


# Load the libraries
library(imager)
library(jpeg)
library(EBImage)


Read the image files

# Define the path to the folder containing the JPG images
folder_path <- "/Users/johnnyrodriguez/jpg"

# List all JPG files in the folder using a regular expression to match '.jpg' files
jpg_files <- list.files(path = folder_path, pattern = "\\.jpg$", full.names = TRUE)

# Initialize an empty list to store image objects
image_list <- list()

# Loop through each JPG file to read and store the image
for (i in 1:length(jpg_files)) {
  # Read the image from the file path
  img <- load.image(jpg_files[i])
  
  # Append the image object to the list
  image_list[[i]] <- img
}

# Plot the first image in the list for a visual check
plot(image_list[[1]])


Load images into array

# Define a function to plot a 3D array as an image using the EBImage package
plot_jpg_array <- function(img_array) {
  # Convert the array to an EBImage object
  img_eb <- EBImage::Image(data = img_array, colormode = 'Color')
  
  # Rotate the image by 90 degrees for correct orientation
  img_rotated <- EBImage::rotate(img_eb, 90)
  
  # Display the rotated image
  EBImage::display(img_rotated)
}

# Initialize parameters for image resizing
height <- 1200
width <- 2500
scale <- 20

# Initialize a 4D array to store resized images
im <- array(rep(0, length(image_list) * (height / scale) * (width / scale) * 3),
            dim = c(length(image_list), height / scale, width / scale, 3))

# Loop through each image to resize and store it in the 4D array
for (i in 1:length(image_list)) {
  # Read the image from the file path
  tmp <- jpg_files[i]
  img <- readJPEG(tmp)
  
  # Resize the image and store it in the array
  temp <- EBImage::resize(as.Image(img), height / scale, width / scale)
  im[i, , , ] <- array(temp@.Data, dim = c(1, height / scale, width / scale, 3))
}


Plot the shoes

# Plot the resized images in a 3x3 grid
par(mfrow=c(3,3))
par(mai=c(0.3,0.3,0.3,0.3))
for (i in 1:min(length(image_list), dim(im)[1])) {
  plot_jpg_array(im[i, , , ])
}


Generate Principle Components

#Perform Principal Component Analysis (PCA) on the images
height = 1200
width = 2500
scale = 20
newdata = im
dim(newdata) = c(length(jpg_files), height * width * 3 / scale^2)
mypca = princomp(t(as.matrix(newdata)), scores = TRUE, cor = TRUE)

# Verify that the sum of the variances equals 1
sum(mypca$sdev^2 / sum(mypca$sdev^2))
## [1] 1
# Calculate the proportion of variance explained by the components
mycomponents=mypca$sdev^2/sum(mypca$sdev^2)

# First 2 components account for 80% of variability
sum(mycomponents [1:2]) 
## [1] 0.7940449
# First 5 components account for 90% of variability
sum(mycomponents[1:5])
## [1] 0.8913841


Eigen Shoes

# Transpose the PCA scores and reshape them to the original image dimensions
mypca2 = t(mypca$scores)
dim(mypca2) = c(length(jpg_files), height / scale, width / scale, 3)

# Plot the first 17 principal components
par(mfrow = c(5, 5))
par(mai = c(0.001, 0.001, 0.001, 0.001))
for (i in 1:17) {
  
  # Rescale the principal component to the range [0, 1]
  pc_image = mypca2[i, , , ]
  min_val = min(pc_image)
  max_val = max(pc_image)
  rescaled_pc_image = (pc_image - min_val) / (max_val - min_val)
  
  # Plot the rescaled principal component
  plot_jpg_array(rescaled_pc_image)
}