library(jpeg)
library(OpenImageR)
library(graphics)
library(tidyverse)
library(reshape2)
library(imager)

Assignment

Build and visualize eigenimagery that accounts for 80% of the variability

Read data

Import zipped jpeg and read into R as a matrix.

# unzip folder
unzip("jpg.zip")

# create list of files
file_list <- list.files("jpg", pattern = "*.jpg")

# initialize empty list to store
jpeg_list <- list()

# loop through each file and read it into jpeg list
for (file in 1:length(file_list)) {
  img <- readJPEG(file_list[file])
  jpeg_list[[file]] <- img
}

# convert list to array
jpeg_array <- array(unlist(jpeg_list), 
                    dim = c(dim(jpeg_list[[1]]),
                            length(jpeg_list)))

# print dimensions of array
print(dim(jpeg_array))
## [1] 1200 2500    3   17

We can see there are 17 pictures, all 1200 x 2500 with 3 dimensions. Now let’s plot these images to see what they look like.

Plot data

Let’s take a look at the original images.

# save one image
img <- jpeg_array[,,,1]

# Create a blank plot
# set plot params
plot(0, type="n", xlim=c(0, ncol(img)), 
     ylim=c(0, nrow(img)),
     yaxt="n", xaxt="n", 
     xlab="", ylab="", bty="n")

# Display the image on the plot
graphics::rasterImage(img, 0, 0, ncol(img), nrow(img))

Now let’s try and get this to loop through all images.

par(mfrow=c(3,3)) # set number of rows and columns
par(mai=c(.3,.3,.3,.3)) # set area in inches

# loop through each element and plot image
for (i in seq_along(file_list)) {
  # read image in as array
  img <- jpeg::readJPEG(file_list[i])
  
  # create blank plot
  # set plot params
  plot(0, type="n", 
       xlim=c(0, ncol(img)),
       ylim=c(0, nrow(img)),
       yaxt="n", xaxt="n", 
       xlab="", ylab="", bty="n")
  
  # Display the image on the plot
  graphics::rasterImage(img, 0, 0, ncol(img), nrow(img))
}

Resize

I am going to have to resize the jpegs prior to vectorizing them to conserve memory.

# Define the desired output size
output_size <- c(width(img)/20, height(img)/20)
  
# Create an empty array to store the resized images
resized_array <- array(NA, dim = c(output_size[1], output_size[2],
                                   dim(jpeg_array)[3], dim(jpeg_array)[4]))
  
  # oop through each image in the array, resize it, and store it in the output array
  for (i in 1:dim(jpeg_array)[4]) {
    img <- jpeg_array[, , , i]
    resized_img <- OpenImageR::resizeImage(img, output_size[1], output_size[2],
                                           normalize_pixels = TRUE)
    resized_array[, , , i] <- resized_img
  }

# resized image array dimensions
dim(resized_array)
## [1]  60 125   3  17

Let’s plot these resized images to ensure they are correct

img_resized <- resized_array[,,,1]

# create a blank plot
# set plot params
plot(0, type="n", xlim=c(0, ncol(img_resized)), 
     ylim=c(0, nrow(img_resized)),
     yaxt="n", xaxt="n", 
     xlab="", ylab="", bty="n")

# Display the image on the plot
graphics::rasterImage(img_resized, 0, 0, ncol(img_resized), nrow(img_resized))

Ok, pixalated 20 less its original size, but more workable.

Vectorize

Each jpeg needs to be vectorized into a column. Let’s convert the array into a dataframe, then use the dplyr function pivot_wider() and use image number as column eaders

Scale and get components

Scale, get mean and standard deviation for future use.

scaled_shoes <- scale(df, center = F, scale = F)
mean_shoe <- attr(scaled_shoes, "scaled:center")
std_shoe <- attr(scaled_shoes, "scaled:scale") 

Calculate covariance

We need to do this in order to get the covariance matrix

# calculate cov of scaled df
cor_scaled <- stats::cor(scaled_shoes)

Get the Eigencomponents

Generate eigencomponents by calculating the cumulative sum of all eigenvalues, then dividing by sum of eigenvalues

# get eigenvalues of corvariance scaled
myeigen <- eigen(cor_scaled)
# set variables
eigenvalues <- myeigen$values
eigenvectors <- myeigen$vectors

# generate eigencomponents
cumsum(eigenvalues) / sum(eigenvalues)
##  [1] 0.6833796 0.7836119 0.8350410 0.8629807 0.8827157 0.8996343 0.9143339
##  [8] 0.9269998 0.9375131 0.9474672 0.9565156 0.9650405 0.9734219 0.9805532
## [15] 0.9875810 0.9943436 1.0000000

This tells us we exceed 80% variability by just using the first three images.

Plot Eigenshoe

Let’s plot the top 3 images to visualize 83% variability of shoes.

# scale shoes
scaling_shoes <- diag(eigenvalues[1:3]^(-1/2)) / (sqrt(nrow(scaled_shoes)-1))

# create eigenshoes
eigenshoes <- scaled_shoes%*%eigenvectors[,1:3]%*%scaling_shoes

# generate eigenimages
eigenimage_1 <- array(eigenshoes[,1], dim(resized_array[,,,1]))
eigenimage_2 <- array(eigenshoes[,2], dim(resized_array[,,,1]))
eigenimage_3 <- array(eigenshoes[,3], dim(resized_array[,,,1]))

Display top 3 images

Eigenshoe 1

# plot top 3 eigenshoes
OpenImageR::imageShow(eigenimage_1)

Eigenshoe 2

OpenImageR::imageShow(eigenimage_2)

Eigenshoe 3

OpenImageR::imageShow(eigenimage_3)