# Define the path to the folder containing the JPG images
folder_path <- "/Users/johnnyrodriguez/jpg"
# List all JPG files in the folder using a regular expression to match '.jpg' files
jpg_files <- list.files(path = folder_path, pattern = "\\.jpg$", full.names = TRUE)
# Initialize an empty list to store image objects
image_list <- list()
# Loop through each JPG file to read and store the image
for (i in 1:length(jpg_files)) {
# Read the image from the file path
img <- load.image(jpg_files[i])
# Append the image object to the list
image_list[[i]] <- img
}
# Plot the first image in the list for a visual check
plot(image_list[[1]])
# Define a function to plot a 3D array as an image using the EBImage package
plot_jpg_array <- function(img_array) {
# Convert the array to an EBImage object
img_eb <- EBImage::Image(data = img_array, colormode = 'Color')
# Rotate the image by 90 degrees for correct orientation
img_rotated <- EBImage::rotate(img_eb, 90)
# Display the rotated image
EBImage::display(img_rotated)
}
# Initialize parameters for image resizing
height <- 1200
width <- 2500
scale <- 20
# Initialize a 4D array to store resized images
im <- array(rep(0, length(image_list) * (height / scale) * (width / scale) * 3),
dim = c(length(image_list), height / scale, width / scale, 3))
# Loop through each image to resize and store it in the 4D array
for (i in 1:length(image_list)) {
# Read the image from the file path
tmp <- jpg_files[i]
img <- readJPEG(tmp)
# Resize the image and store it in the array
temp <- EBImage::resize(as.Image(img), height / scale, width / scale)
im[i, , , ] <- array(temp@.Data, dim = c(1, height / scale, width / scale, 3))
}
# Plot the resized images in a 3x3 grid
par(mfrow=c(3,3))
par(mai=c(0.3,0.3,0.3,0.3))
for (i in 1:min(length(image_list), dim(im)[1])) {
plot_jpg_array(im[i, , , ])
}
#Perform Principal Component Analysis (PCA) on the images
height = 1200
width = 2500
scale = 20
newdata = im
dim(newdata) = c(length(jpg_files), height * width * 3 / scale^2)
mypca = princomp(t(as.matrix(newdata)), scores = TRUE, cor = TRUE)
# Verify that the sum of the variances equals 1
sum(mypca$sdev^2 / sum(mypca$sdev^2))
## [1] 1
# Calculate the proportion of variance explained by the components
mycomponents=mypca$sdev^2/sum(mypca$sdev^2)
# First 2 components account for 80% of variability
sum(mycomponents [1:2])
## [1] 0.7940449
## [1] 0.8913841
# Transpose the PCA scores and reshape them to the original image dimensions
mypca2 = t(mypca$scores)
dim(mypca2) = c(length(jpg_files), height / scale, width / scale, 3)
# Plot the first 17 principal components
par(mfrow = c(5, 5))
par(mai = c(0.001, 0.001, 0.001, 0.001))
for (i in 1:17) {
# Rescale the principal component to the range [0, 1]
pc_image = mypca2[i, , , ]
min_val = min(pc_image)
max_val = max(pc_image)
rescaled_pc_image = (pc_image - min_val) / (max_val - min_val)
# Plot the rescaled principal component
plot_jpg_array(rescaled_pc_image)
}