library(jpeg)
library(OpenImageR)
library(graphics)
library(tidyverse)
library(reshape2)
library(imager)
Build and visualize eigenimagery that accounts for 80% of the variability
Import zipped jpeg and read into R as a matrix.
# unzip folder
unzip("jpg.zip")
# create list of files
file_list <- list.files("jpg", pattern = "*.jpg")
# initialize empty list to store
jpeg_list <- list()
# loop through each file and read it into jpeg list
for (file in 1:length(file_list)) {
img <- readJPEG(file_list[file])
jpeg_list[[file]] <- img
}
# convert list to array
jpeg_array <- array(unlist(jpeg_list),
dim = c(dim(jpeg_list[[1]]),
length(jpeg_list)))
# print dimensions of array
print(dim(jpeg_array))
## [1] 1200 2500 3 17
We can see there are 17 pictures, all 1200 x 2500 with 3 dimensions. Now let’s plot these images to see what they look like.
Let’s take a look at the original images.
# save one image
img <- jpeg_array[,,,1]
# Create a blank plot
# set plot params
plot(0, type="n", xlim=c(0, ncol(img)),
ylim=c(0, nrow(img)),
yaxt="n", xaxt="n",
xlab="", ylab="", bty="n")
# Display the image on the plot
graphics::rasterImage(img, 0, 0, ncol(img), nrow(img))
Now let’s try and get this to loop through all images.
par(mfrow=c(3,3)) # set number of rows and columns
par(mai=c(.3,.3,.3,.3)) # set area in inches
# loop through each element and plot image
for (i in seq_along(file_list)) {
# read image in as array
img <- jpeg::readJPEG(file_list[i])
# create blank plot
# set plot params
plot(0, type="n",
xlim=c(0, ncol(img)),
ylim=c(0, nrow(img)),
yaxt="n", xaxt="n",
xlab="", ylab="", bty="n")
# Display the image on the plot
graphics::rasterImage(img, 0, 0, ncol(img), nrow(img))
}
I am going to have to resize the jpegs prior to vectorizing them to conserve memory.
# Define the desired output size
output_size <- c(width(img)/20, height(img)/20)
# Create an empty array to store the resized images
resized_array <- array(NA, dim = c(output_size[1], output_size[2],
dim(jpeg_array)[3], dim(jpeg_array)[4]))
# oop through each image in the array, resize it, and store it in the output array
for (i in 1:dim(jpeg_array)[4]) {
img <- jpeg_array[, , , i]
resized_img <- OpenImageR::resizeImage(img, output_size[1], output_size[2],
normalize_pixels = TRUE)
resized_array[, , , i] <- resized_img
}
# resized image array dimensions
dim(resized_array)
## [1] 60 125 3 17
Let’s plot these resized images to ensure they are correct
img_resized <- resized_array[,,,1]
# create a blank plot
# set plot params
plot(0, type="n", xlim=c(0, ncol(img_resized)),
ylim=c(0, nrow(img_resized)),
yaxt="n", xaxt="n",
xlab="", ylab="", bty="n")
# Display the image on the plot
graphics::rasterImage(img_resized, 0, 0, ncol(img_resized), nrow(img_resized))
Ok, pixalated 20 less its original size, but more workable.
Each jpeg needs to be vectorized into a column. Let’s convert the
array into a dataframe, then use the dplyr function
pivot_wider()
and use image number as column eaders
Scale, get mean and standard deviation for future use.
scaled_shoes <- scale(df, center = F, scale = F)
mean_shoe <- attr(scaled_shoes, "scaled:center")
std_shoe <- attr(scaled_shoes, "scaled:scale")
We need to do this in order to get the covariance matrix
# calculate cov of scaled df
cor_scaled <- stats::cor(scaled_shoes)
Generate eigencomponents by calculating the cumulative sum of all eigenvalues, then dividing by sum of eigenvalues
# get eigenvalues of corvariance scaled
myeigen <- eigen(cor_scaled)
# set variables
eigenvalues <- myeigen$values
eigenvectors <- myeigen$vectors
# generate eigencomponents
cumsum(eigenvalues) / sum(eigenvalues)
## [1] 0.6833796 0.7836119 0.8350410 0.8629807 0.8827157 0.8996343 0.9143339
## [8] 0.9269998 0.9375131 0.9474672 0.9565156 0.9650405 0.9734219 0.9805532
## [15] 0.9875810 0.9943436 1.0000000
This tells us we exceed 80% variability by just using the first three images.
Let’s plot the top 3 images to visualize 83% variability of shoes.
# scale shoes
scaling_shoes <- diag(eigenvalues[1:3]^(-1/2)) / (sqrt(nrow(scaled_shoes)-1))
# create eigenshoes
eigenshoes <- scaled_shoes%*%eigenvectors[,1:3]%*%scaling_shoes
# generate eigenimages
eigenimage_1 <- array(eigenshoes[,1], dim(resized_array[,,,1]))
eigenimage_2 <- array(eigenshoes[,2], dim(resized_array[,,,1]))
eigenimage_3 <- array(eigenshoes[,3], dim(resized_array[,,,1]))
Display top 3 images
Eigenshoe 1
# plot top 3 eigenshoes
OpenImageR::imageShow(eigenimage_1)
Eigenshoe 2
OpenImageR::imageShow(eigenimage_2)
Eigenshoe 3
OpenImageR::imageShow(eigenimage_3)