Homework 4

Getting started: Load the libraries

# Set working directory 
setwd("/Users/heleinefouda/Desktop/jpg")
image_directory <- "/Users/heleinefouda/Desktop/jpg"

Import the image files et define parameters

path_files <-"/Users/heleinefouda/Desktop/jpg"

# List JPEG files in the directory:
jpeg_files <- list.files(image_directory, pattern = "\\.jpg$", full.names = TRUE)

# Define the paramaters: width, height and scale
 height <- 1200
 scale <- 20
 width <- 2500
 # Set the scaling factor
scale <- 20

# Calculate new dimensions after scaling
new_height <- height / scale
new_width <- width / scale

Define the functions to be used

# Function to ...
height=1200; width=2500;scale=20
plot_jpeg = function(path, add=FALSE)
{ jpg = readJPEG(path, native=T) # read the file
  res = dim(jpg)[2:1] # get the resolution, [x, y]
  if (!add) # initialize an empty plot area if add==FALSE
    plot(1,1,xlim=c(1,res[1]),ylim=c(1,res[2]),asp=1,type='n',xaxs='i',yaxs='i',xaxt='n',yaxt='n',xlab='',ylab='',bty='n')
  rasterImage(jpg,1,1,res[1],res[2])
}

Read images into a list:

# Read images into a list:
image_list <- lapply(jpeg_files, jpeg::readJPEG)

Check number of files

# number of files
num <- length(jpeg_files )
num

## [1] 17

Read and plot the third shoe first, as a test.

# Read third shoe
img <- jpeg::readJPEG(paste0( path_files,"/RC_2500x1200_2014_us_53469.jpg"))
dim(img)

## [1] 1200 2500    3

Find the dimension of the matrix

filenames <- list.files(path = path_files, pattern = ".jpg")
# matrix where all the files will be stored
images_data <- matrix(0, length(filenames), prod(dim(img)))
# Show the dimensions of the matrix
dim(images_data)

## [1]      17 9000000

code gives us a 17 * 9MM matrix, which is equals to 17 shoes, each having dimensions 1200 * 2500 * 3

# Save the row,col,channels dimension
row_dim <- dim(img)[1]
col_dim <- dim(img)[2]
channel_dim <- dim(img)[3]

Read in all the images contained in the files

# Combine images into a single matrix
image_matrix <- abind(image_list, along = 3)

Load all images into a single vector

# Initialize images_data before the loop
images_data <- matrix(0, nrow = length(filenames), ncol = 2500 * 1200 * 3)  # Replace width and height

counter <- 1
for (filename in filenames) {
  print(paste("loading file: ", filename))
  
  # Read the image
  img <- jpeg::readJPEG(file.path("/Users/heleinefouda/Desktop/jpg", filename))
  
  # Extract red, green, and blue channels
  red <- as.vector(img[, , 1])
  green <- as.vector(img[, , 2])
  blue <- as.vector(img[, , 3])
  
  # Concatenate channels into a single vector and store in images_data
  images_data[counter,] <- c(red, green, blue)
  
  counter <- counter + 1
}

## [1] "loading file:  RC_2500x1200_2014_us_53446.jpg"
## [1] "loading file:  RC_2500x1200_2014_us_53455.jpg"
## [1] "loading file:  RC_2500x1200_2014_us_53469.jpg"
## [1] "loading file:  RC_2500x1200_2014_us_53626.jpg"
## [1] "loading file:  RC_2500x1200_2014_us_53632.jpg"
## [1] "loading file:  RC_2500x1200_2014_us_53649.jpg"
## [1] "loading file:  RC_2500x1200_2014_us_53655.jpg"
## [1] "loading file:  RC_2500x1200_2014_us_53663.jpg"
## [1] "loading file:  RC_2500x1200_2014_us_53697.jpg"
## [1] "loading file:  RC_2500x1200_2014_us_54018.jpg"
## [1] "loading file:  RC_2500x1200_2014_us_54067.jpg"
## [1] "loading file:  RC_2500x1200_2014_us_54106.jpg"
## [1] "loading file:  RC_2500x1200_2014_us_54130.jpg"
## [1] "loading file:  RC_2500x1200_2014_us_54148.jpg"
## [1] "loading file:  RC_2500x1200_2014_us_54157.jpg"
## [1] "loading file:  RC_2500x1200_2014_us_54165.jpg"
## [1] "loading file:  RC_2500x1200_2014_us_54172.jpg"

Plot third image(test)

library(imager)

# Plot third shoe


# Convert image to imager format
img_imager <- imager::as.cimg(img)

## Warning in as.cimg.array(img): Assuming third dimension corresponds to colour

# Plot the image using base R plotting functions
plot(as.raster(img_imager), main = "RC_2500x1200_2014_us_53469.jpg")

# Plot the original and transposed images side by side
par(mfrow = c(1, 2))
plot(as.raster(img), main = "Original Image")

Applying transposition

images_data  <- t(images_data )
dim(images_data )

## [1] 9000000      17

The transposed version gives us a 9 MM * 17

Load the Data into an Array and Vectorize

 # Create an array to store resized images
array_a <- array(rep(0, num * new_height * new_width * 3), dim = c(num, new_height, new_width, 3))

# Resize and store each image in the array
for (i in 1:num) {
    temp <- resizeImage(readJPEG(jpeg_files[i]), new_height, new_width)
    array_a[i, , , ] <- array(temp, dim = c(1, new_height, new_width, 3))
}

Flatten the array and store pixel values in a data frame

 # Flatten the array and store pixel values in a data frame
flat <- matrix(0, num, prod(dim(array_a)))

for (i in 1:num) {
    r <- as.vector(array_a[i, , , 1])
    g <- as.vector(array_a[i, , , 2])
    b <- as.vector(array_a[i, , , 3])
    flat[i, ] <- t(c(r, g, b))
}

# Convert the flattened matrix to a data frame
shoes <- as.data.frame(t(flat))

Create a Function to Plot the Images

# Define a function to plot JPEG images
plot_jpeg <- function(path, add = FALSE) {
    jpg <- readJPEG(path, native = TRUE)
    res <- dim(jpg)[2:1]
    if (!add) {
        plot(1, 1, xlim = c(1, res[1]), ylim = c(1, res[2]), asp = 1, type = "n",
            xaxs = "i", yaxs = "i", xaxt = "n", yaxt = "n", xlab = "", ylab = "",
            bty = "n")
    }
    rasterImage(jpg, 1, 1, res[1], res[2])
}

# Plot resized images
par(mfrow = c(3, 3))
par(mai = c(0.3, 0.3, 0.3, 0.3))
for (i in 1:num) {
    plot_jpeg(writeJPEG(array_a[i, , , ]))
}

Standardize the Pixel Values

# Standardize the pixel values
scaled <- scale(shoes, center = TRUE, scale = TRUE)

# Extract mean and standard deviation of standardized pixel values
mean.shoe <- attr(scaled, "scaled:center")
std.shoe <- attr(scaled, "scaled:scale")

Calculate the Correlation Matrix

# Calculate the correlation matrix
Sigma <- cor(scaled)
sigma

## function (object, ...) 
## UseMethod("sigma")
## <bytecode: 0x10f268390>
## <environment: namespace:stats>

# Compute eigenvalues of the correlation matrix

# Compute eigenvalues of the correlation matrix
myeigen <- eigen(Sigma)
eigenvalues <- myeigen$values
eigenvalues

##  [1] 11.61745316  1.70394885  0.87429472  0.47497591  0.33549455  0.28761630
##  [7]  0.24989310  0.21531927  0.17872717  0.16921863  0.15382298  0.14492412
## [13]  0.14248337  0.12123246  0.11947244  0.11496463  0.09615834

Extract eigenvectors of the correlation matrix

# Extract eigenvectors of the correlation matrix
eigenvectors <- myeigen$vectors

# Display the first few rows of eigenvectors in a nice table format
knitr::kable(head(eigenvectors), format = "markdown")

-0.2525635	-0.0575731	-0.1386619	0.3612579	0.3367958	0.0771434	0.5101649	0.4590323	0.1004874	-0.0066637	0.0399923	0.2091592	0.0802011	-0.0279929	0.1473921	-0.3173218	0.0876285
-0.2568621	0.2285982	-0.0981094	0.2282754	-0.0291860	0.1245893	0.1998114	0.1230211	-0.2442517	0.0694919	-0.0229789	-0.2066317	0.2766962	0.0141970	-0.1019089	0.7341851	-0.1212779
-0.1969535	-0.3465766	-0.2315417	0.6676894	-0.1570505	0.2328825	-0.4447300	-0.2179298	0.0839843	-0.0133999	-0.0233552	-0.0477726	-0.0002179	0.0037787	-0.0300553	-0.0703296	0.0396419
-0.2402368	0.3049035	-0.1377157	-0.1120160	-0.3214305	0.3240779	0.0642133	-0.1110570	-0.0661180	-0.0363408	0.3765760	0.4259917	-0.2934239	-0.2116585	-0.1245587	0.0376213	0.3408931
-0.2538292	0.2390470	-0.0601217	-0.0198136	0.3055220	-0.1282683	-0.2885175	0.0737153	0.1689848	0.2884454	0.2680998	-0.1259732	-0.3649987	0.4875641	0.2938786	0.1177442	0.0726146
-0.2082231	-0.3477848	-0.4348510	-0.3317499	0.0020995	-0.1940209	0.0210512	0.0688897	0.1548276	-0.2519525	-0.2612493	-0.1494615	-0.0409279	-0.0791256	0.1059181	0.2341891	0.4915866

Processing matrix with a singular Value Decomposition (SVD)

scaled_data <- scale(images_data)
scaled_data [is.nan(scaled_data)] = 0
svd_decomp <- svd(scaled_data)

Computing variance

# Calculate cumulative variance explained by each eigenshoe
cumulative_variance <- cumsum(myeigen$values) / sum(myeigen$values)
cumulative_variance

##  [1] 0.6833796 0.7836119 0.8350410 0.8629807 0.8827157 0.8996343 0.9143339
##  [8] 0.9269998 0.9375131 0.9474672 0.9565156 0.9650405 0.9734219 0.9805532
## [15] 0.9875810 0.9943436 1.0000000

Plotting variance

plot(svd_decomp$d^2 / sum(svd_decomp$d^2), type = "b", ylab = "Prop. of Variance Explained", pch = 19)

# Read and plot a sample JPEG image

testing_img <- readJPEG(jpeg_files[4])
plot(1:2, type = "n", main = "")
rasterImage(testing_img, 1, 1, 2, 2)

Determine the number of principal components explaining at least 80% of the variance

# Determine the number of principal components explaining at least 80% of the variance
num_comp <- which(cumulative_variance >= 0.8)[1]
num_comp

## [1] 3

Plot cumulative variance explained by eigenshoes

# Calculate cumulative variance explained by each eigenshoe
cumulative_variance <- cumsum(myeigen$values) / sum(myeigen$values)
cumulative_variance

##  [1] 0.6833796 0.7836119 0.8350410 0.8629807 0.8827157 0.8996343 0.9143339
##  [8] 0.9269998 0.9375131 0.9474672 0.9565156 0.9650405 0.9734219 0.9805532
## [15] 0.9875810 0.9943436 1.0000000

# Plot cumulative variance explained by eigenshoes
ggplot(as.data.frame(cumulative_variance), aes(x = 1:num, cumulative_variance)) +
    geom_line() + geom_point() + labs(x = "Number of eigenshoes", y = "Cumulative Variance") +
    scale_x_continuous(breaks = seq(1, 17, by = 2)) + theme_minimal()

## Determine the number of principal components explaining at least 80% of the variance

# Determine the number of principal components explaining at least 80% of the variance
num_comp <- which(cumulative_variance >= 0.8)[1]
num_comp

## [1] 3

Compare different levels of variance

### 80%

var_pct <- 0.8

vectors <- which(cumsum(svd_decomp$d^2 / sum(svd_decomp$d^2)) >= var_pct)[1]
print(paste0("Vectors to use: ", vectors))

## [1] "Vectors to use: 3"

70%

var_pct <- 0.7
vectors <- which(cumsum(svd_decomp$d^2/sum(svd_decomp$d^2)) >= var_pct)[1]
print(paste0("Vectors to use: ", vectors))

## [1] "Vectors to use: 2"

90%

var_pct <- 0.9
vectors <- which(cumsum(svd_decomp$d^2/sum(svd_decomp$d^2)) >= var_pct)[1]
print(paste0("Vectors to use: ", vectors))

## [1] "Vectors to use: 7"

95%

var_pct <- 0.95
vectors <- which(cumsum(svd_decomp$d^2/sum(svd_decomp$d^2)) >= var_pct)[1]
print(paste0("Vectors to use: ", vectors))

## [1] "Vectors to use: 11"

99%

var_pct <- 0.99
vectors <- which(cumsum(svd_decomp$d^2/sum(svd_decomp$d^2)) >= var_pct)[1]
print(paste0("Vectors to use: ", vectors))

## [1] "Vectors to use: 16"

Homework 4

Heleine Fouda

2024-02-17

Getting started: Load the libraries

Import the image files et define parameters

Define the functions to be used

Read images into a list:

Check number of files

Read and plot the third shoe first, as a test.

Find the dimension of the matrix

code gives us a 17 * 9MM matrix, which is equals to 17 shoes, each having dimensions 1200 * 2500 * 3

Read in all the images contained in the files

Load all images into a single vector

Plot third image(test)

Applying transposition

The transposed version gives us a 9 MM * 17

Load the Data into an Array and Vectorize

Flatten the array and store pixel values in a data frame

Create a Function to Plot the Images

Standardize the Pixel Values

Calculate the Correlation Matrix

# Compute eigenvalues of the correlation matrix

Extract eigenvectors of the correlation matrix

Processing matrix with a singular Value Decomposition (SVD)

Computing variance

Plotting variance

Determine the number of principal components explaining at least 80% of the variance

Plot cumulative variance explained by eigenshoes

Compare different levels of variance

70%

90%

95%

99%