Loading Necessary Libraries:

# Setup: Loading necessary libraries
library(jpeg)
library(EBImage)
library(OpenImageR)
library(tidyverse)
library(foreach)

With the attached data file, build and visualize eigenimagery that accounts for 80% of the variability. Provide full R code and discussion.

Loading in the Images

# Set the path to the directory containing JPEG files
path <- "~/Desktop/Data-605/Data-605/Homeworks/jpg"

# List all JPEG files in the specified directory
files <- list.files(path, pattern = "\\.jpg", full.names = TRUE)

# Calculate the number of JPEG files
num <- length(files)

# Define image dimensions
height <- 1200
width <- 2500

# Set the scaling factor
scale <- 20

# Calculate new dimensions after scaling
new_height <- height / scale
new_width <- width / scale

Load the Data into an Array and Vectorize

Since the images are very large, we resize each image according to a chosen scale and then load each image into an array of scaled dimension.

# Create an array to store resized images
array_a <- array(rep(0, num * new_height * new_width * 3), dim = c(num, new_height, new_width, 3))

# Resize and store each image in the array
for (i in 1:num) {
    temp <- resizeImage(readJPEG(files[i]), new_height, new_width)
    array_a[i, , , ] <- array(temp, dim = c(1, new_height, new_width, 3))
}

We then create a matrix of the RGB components of each image by looping through the array of scaled images. Within the loop, the R, G, and B components of each image are converted to vectors. These vectors are then concatenated and transposed and added into the matrix of images.

# Flatten the array and store pixel values in a data frame
flat <- matrix(0, num, prod(dim(array_a)))

for (i in 1:num) {
    r <- as.vector(array_a[i, , , 1])
    g <- as.vector(array_a[i, , , 2])
    b <- as.vector(array_a[i, , , 3])
    flat[i, ] <- t(c(r, g, b))
}

# Convert the flattened matrix to a data frame
shoes <- as.data.frame(t(flat))

Creating a Function to Plot the Images

# Define a function to plot JPEG images
plot_jpeg <- function(path, add = FALSE) {
    jpg <- readJPEG(path, native = TRUE)
    res <- dim(jpg)[2:1]
    if (!add) {
        plot(1, 1, xlim = c(1, res[1]), ylim = c(1, res[2]), asp = 1, type = "n",
            xaxs = "i", yaxs = "i", xaxt = "n", yaxt = "n", xlab = "", ylab = "",
            bty = "n")
    }
    rasterImage(jpg, 1, 1, res[1], res[2])
}

# Plot resized images
par(mfrow = c(3, 3))
par(mai = c(0.3, 0.3, 0.3, 0.3))
for (i in 1:num) {
    plot_jpeg(writeJPEG(array_a[i, , , ]))
}

Standardize the Pixel Values

# Standardize the pixel values
scaled <- scale(shoes, center = TRUE, scale = TRUE)

# Extract mean and standard deviation of standardized pixel values
mean.shoe <- attr(scaled, "scaled:center")
std.shoe <- attr(scaled, "scaled:scale")

Calculate the Correlation Matrix

# Calculate the correlation matrix
Sigma_ <- cor(scaled)

Compute Eigenvalues of the Correlation Matrix

# Compute eigenvalues of the correlation matrix
myeigen <- eigen(Sigma_)
eigenvalues <- myeigen$values
eigenvalues
##  [1] 11.61745316  1.70394885  0.87429472  0.47497591  0.33549455  0.28761630
##  [7]  0.24989310  0.21531927  0.17872717  0.16921863  0.15382298  0.14492412
## [13]  0.14248337  0.12123246  0.11947244  0.11496463  0.09615834

extract Eigenvectors of the Correlation Matrix:

# Extract eigenvectors of the correlation matrix
eigenvectors <- myeigen$vectors

# Display the first few rows of eigenvectors in a nice table format
knitr::kable(head(eigenvectors), format = "markdown")
0.2525635 -0.0575731 -0.1386619 0.3612579 0.3367958 0.0771434 0.5101649 0.4590323 0.1004874 -0.0066637 0.0399923 0.2091592 0.0802011 -0.0279929 0.1473921 0.3173218 0.0876285
0.2568621 0.2285982 -0.0981094 0.2282754 -0.0291860 0.1245893 0.1998114 0.1230211 -0.2442517 0.0694919 -0.0229789 -0.2066317 0.2766962 0.0141970 -0.1019089 -0.7341851 -0.1212779
0.1969535 -0.3465766 -0.2315417 0.6676894 -0.1570505 0.2328825 -0.4447300 -0.2179298 0.0839843 -0.0133999 -0.0233552 -0.0477726 -0.0002179 0.0037787 -0.0300553 0.0703296 0.0396419
0.2402368 0.3049035 -0.1377157 -0.1120160 -0.3214305 0.3240779 0.0642133 -0.1110570 -0.0661180 -0.0363408 0.3765760 0.4259917 -0.2934239 -0.2116585 -0.1245587 -0.0376213 0.3408931
0.2538292 0.2390470 -0.0601217 -0.0198136 0.3055220 -0.1282683 -0.2885175 0.0737153 0.1689848 0.2884454 0.2680998 -0.1259732 -0.3649987 0.4875641 0.2938786 -0.1177442 0.0726146
0.2082231 -0.3477848 -0.4348510 -0.3317499 0.0020995 -0.1940209 0.0210512 0.0688897 0.1548276 -0.2519525 -0.2612493 -0.1494615 -0.0409279 -0.0791256 0.1059181 -0.2341891 0.4915866

Read and Plot a Sample JPEG Image:

# Read and plot a sample JPEG image
testing_img <- readJPEG(files[5])
plot(1:2, type = "n", main = "")
rasterImage(testing_img, 1, 1, 2, 2)

Perform PCA on Standardized Pixel Values

# Perform PCA on standardized pixel values
scaling <- diag(eigenvalues[1:5]^(-1/2)) / (sqrt(nrow(scaled) - 1))
eigenshoes <- scaled %*% eigenvectors[, 1:5] %*% scaling

# Visualize the fourth eigenshoe
par(mfrow = c(2, 3))
imageShow(array(eigenshoes[, 5], c(new_height, new_width, 3)))

Calculate Cumulative Variance Explained by Each Eigenshoe:

# Calculate cumulative variance explained by each eigenshoe
cumulative_variance <- cumsum(myeigen$values) / sum(myeigen$values)
cumulative_variance
##  [1] 0.6833796 0.7836119 0.8350410 0.8629807 0.8827157 0.8996343 0.9143339
##  [8] 0.9269998 0.9375131 0.9474672 0.9565156 0.9650405 0.9734219 0.9805532
## [15] 0.9875810 0.9943436 1.0000000

Determine the Number of Principal Components Explaining at Least 80% of the Variance:

# Determine the number of principal components explaining at least 80% of the variance
num_comp <- which(cumulative_variance >= 0.8)[1]
num_comp
## [1] 3

Plot Cumulative Variance Explained by Eigenshoes:

# Plot cumulative variance explained by eigenshoes
ggplot(as.data.frame(cumulative_variance), aes(x = 1:num, cumulative_variance)) +
    geom_line() + geom_point() + labs(x = "Number of eigenshoes", y = "Cumulative Variance") +
    scale_x_continuous(breaks = seq(1, 17, by = 2)) + theme_minimal()