# Setup: Loading necessary libraries
library(jpeg)
library(EBImage)
library(OpenImageR)
library(tidyverse)
library(foreach)With the attached data file, build and visualize eigenimagery that accounts for 80% of the variability. Provide full R code and discussion.
# Set the path to the directory containing JPEG files
path <- "~/Desktop/Data-605/Data-605/Homeworks/jpg"
# List all JPEG files in the specified directory
files <- list.files(path, pattern = "\\.jpg", full.names = TRUE)
# Calculate the number of JPEG files
num <- length(files)
# Define image dimensions
height <- 1200
width <- 2500
# Set the scaling factor
scale <- 20
# Calculate new dimensions after scaling
new_height <- height / scale
new_width <- width / scaleSince the images are very large, we resize each image according to a chosen scale and then load each image into an array of scaled dimension.
# Create an array to store resized images
array_a <- array(rep(0, num * new_height * new_width * 3), dim = c(num, new_height, new_width, 3))
# Resize and store each image in the array
for (i in 1:num) {
temp <- resizeImage(readJPEG(files[i]), new_height, new_width)
array_a[i, , , ] <- array(temp, dim = c(1, new_height, new_width, 3))
}We then create a matrix of the RGB components of each image by looping through the array of scaled images. Within the loop, the R, G, and B components of each image are converted to vectors. These vectors are then concatenated and transposed and added into the matrix of images.
# Flatten the array and store pixel values in a data frame
flat <- matrix(0, num, prod(dim(array_a)))
for (i in 1:num) {
r <- as.vector(array_a[i, , , 1])
g <- as.vector(array_a[i, , , 2])
b <- as.vector(array_a[i, , , 3])
flat[i, ] <- t(c(r, g, b))
}
# Convert the flattened matrix to a data frame
shoes <- as.data.frame(t(flat))# Define a function to plot JPEG images
plot_jpeg <- function(path, add = FALSE) {
jpg <- readJPEG(path, native = TRUE)
res <- dim(jpg)[2:1]
if (!add) {
plot(1, 1, xlim = c(1, res[1]), ylim = c(1, res[2]), asp = 1, type = "n",
xaxs = "i", yaxs = "i", xaxt = "n", yaxt = "n", xlab = "", ylab = "",
bty = "n")
}
rasterImage(jpg, 1, 1, res[1], res[2])
}
# Plot resized images
par(mfrow = c(3, 3))
par(mai = c(0.3, 0.3, 0.3, 0.3))
for (i in 1:num) {
plot_jpeg(writeJPEG(array_a[i, , , ]))
}# Compute eigenvalues of the correlation matrix
myeigen <- eigen(Sigma_)
eigenvalues <- myeigen$values
eigenvalues## [1] 11.61745316 1.70394885 0.87429472 0.47497591 0.33549455 0.28761630
## [7] 0.24989310 0.21531927 0.17872717 0.16921863 0.15382298 0.14492412
## [13] 0.14248337 0.12123246 0.11947244 0.11496463 0.09615834
# Extract eigenvectors of the correlation matrix
eigenvectors <- myeigen$vectors
# Display the first few rows of eigenvectors in a nice table format
knitr::kable(head(eigenvectors), format = "markdown")| 0.2525635 | -0.0575731 | -0.1386619 | 0.3612579 | 0.3367958 | 0.0771434 | 0.5101649 | 0.4590323 | 0.1004874 | -0.0066637 | 0.0399923 | 0.2091592 | 0.0802011 | -0.0279929 | 0.1473921 | 0.3173218 | 0.0876285 |
| 0.2568621 | 0.2285982 | -0.0981094 | 0.2282754 | -0.0291860 | 0.1245893 | 0.1998114 | 0.1230211 | -0.2442517 | 0.0694919 | -0.0229789 | -0.2066317 | 0.2766962 | 0.0141970 | -0.1019089 | -0.7341851 | -0.1212779 |
| 0.1969535 | -0.3465766 | -0.2315417 | 0.6676894 | -0.1570505 | 0.2328825 | -0.4447300 | -0.2179298 | 0.0839843 | -0.0133999 | -0.0233552 | -0.0477726 | -0.0002179 | 0.0037787 | -0.0300553 | 0.0703296 | 0.0396419 |
| 0.2402368 | 0.3049035 | -0.1377157 | -0.1120160 | -0.3214305 | 0.3240779 | 0.0642133 | -0.1110570 | -0.0661180 | -0.0363408 | 0.3765760 | 0.4259917 | -0.2934239 | -0.2116585 | -0.1245587 | -0.0376213 | 0.3408931 |
| 0.2538292 | 0.2390470 | -0.0601217 | -0.0198136 | 0.3055220 | -0.1282683 | -0.2885175 | 0.0737153 | 0.1689848 | 0.2884454 | 0.2680998 | -0.1259732 | -0.3649987 | 0.4875641 | 0.2938786 | -0.1177442 | 0.0726146 |
| 0.2082231 | -0.3477848 | -0.4348510 | -0.3317499 | 0.0020995 | -0.1940209 | 0.0210512 | 0.0688897 | 0.1548276 | -0.2519525 | -0.2612493 | -0.1494615 | -0.0409279 | -0.0791256 | 0.1059181 | -0.2341891 | 0.4915866 |
# Read and plot a sample JPEG image
testing_img <- readJPEG(files[5])
plot(1:2, type = "n", main = "")
rasterImage(testing_img, 1, 1, 2, 2)# Perform PCA on standardized pixel values
scaling <- diag(eigenvalues[1:5]^(-1/2)) / (sqrt(nrow(scaled) - 1))
eigenshoes <- scaled %*% eigenvectors[, 1:5] %*% scaling
# Visualize the fourth eigenshoe
par(mfrow = c(2, 3))
imageShow(array(eigenshoes[, 5], c(new_height, new_width, 3)))
# Calculate cumulative variance explained by each eigenshoe
cumulative_variance <- cumsum(myeigen$values) / sum(myeigen$values)
cumulative_variance## [1] 0.6833796 0.7836119 0.8350410 0.8629807 0.8827157 0.8996343 0.9143339
## [8] 0.9269998 0.9375131 0.9474672 0.9565156 0.9650405 0.9734219 0.9805532
## [15] 0.9875810 0.9943436 1.0000000
# Determine the number of principal components explaining at least 80% of the variance
num_comp <- which(cumulative_variance >= 0.8)[1]
num_comp## [1] 3
# Plot cumulative variance explained by eigenshoes
ggplot(as.data.frame(cumulative_variance), aes(x = 1:num, cumulative_variance)) +
geom_line() + geom_point() + labs(x = "Number of eigenshoes", y = "Cumulative Variance") +
scale_x_continuous(breaks = seq(1, 17, by = 2)) + theme_minimal()