# Set working directory
setwd("/Users/heleinefouda/Desktop/jpg")
image_directory <- "/Users/heleinefouda/Desktop/jpg"
path_files <-"/Users/heleinefouda/Desktop/jpg"
# List JPEG files in the directory:
jpeg_files <- list.files(image_directory, pattern = "\\.jpg$", full.names = TRUE)
# Define the paramaters: width, height and scale
height <- 1200
scale <- 20
width <- 2500
# Set the scaling factor
scale <- 20
# Calculate new dimensions after scaling
new_height <- height / scale
new_width <- width / scale
# Function to ...
height=1200; width=2500;scale=20
plot_jpeg = function(path, add=FALSE)
{ jpg = readJPEG(path, native=T) # read the file
res = dim(jpg)[2:1] # get the resolution, [x, y]
if (!add) # initialize an empty plot area if add==FALSE
plot(1,1,xlim=c(1,res[1]),ylim=c(1,res[2]),asp=1,type='n',xaxs='i',yaxs='i',xaxt='n',yaxt='n',xlab='',ylab='',bty='n')
rasterImage(jpg,1,1,res[1],res[2])
}
# Read images into a list:
image_list <- lapply(jpeg_files, jpeg::readJPEG)
# number of files
num <- length(jpeg_files )
num
## [1] 17
# Read third shoe
img <- jpeg::readJPEG(paste0( path_files,"/RC_2500x1200_2014_us_53469.jpg"))
dim(img)
## [1] 1200 2500 3
filenames <- list.files(path = path_files, pattern = ".jpg")
# matrix where all the files will be stored
images_data <- matrix(0, length(filenames), prod(dim(img)))
# Show the dimensions of the matrix
dim(images_data)
## [1] 17 9000000
# Save the row,col,channels dimension
row_dim <- dim(img)[1]
col_dim <- dim(img)[2]
channel_dim <- dim(img)[3]
# Combine images into a single matrix
image_matrix <- abind(image_list, along = 3)
# Initialize images_data before the loop
images_data <- matrix(0, nrow = length(filenames), ncol = 2500 * 1200 * 3) # Replace width and height
counter <- 1
for (filename in filenames) {
print(paste("loading file: ", filename))
# Read the image
img <- jpeg::readJPEG(file.path("/Users/heleinefouda/Desktop/jpg", filename))
# Extract red, green, and blue channels
red <- as.vector(img[, , 1])
green <- as.vector(img[, , 2])
blue <- as.vector(img[, , 3])
# Concatenate channels into a single vector and store in images_data
images_data[counter,] <- c(red, green, blue)
counter <- counter + 1
}
## [1] "loading file: RC_2500x1200_2014_us_53446.jpg"
## [1] "loading file: RC_2500x1200_2014_us_53455.jpg"
## [1] "loading file: RC_2500x1200_2014_us_53469.jpg"
## [1] "loading file: RC_2500x1200_2014_us_53626.jpg"
## [1] "loading file: RC_2500x1200_2014_us_53632.jpg"
## [1] "loading file: RC_2500x1200_2014_us_53649.jpg"
## [1] "loading file: RC_2500x1200_2014_us_53655.jpg"
## [1] "loading file: RC_2500x1200_2014_us_53663.jpg"
## [1] "loading file: RC_2500x1200_2014_us_53697.jpg"
## [1] "loading file: RC_2500x1200_2014_us_54018.jpg"
## [1] "loading file: RC_2500x1200_2014_us_54067.jpg"
## [1] "loading file: RC_2500x1200_2014_us_54106.jpg"
## [1] "loading file: RC_2500x1200_2014_us_54130.jpg"
## [1] "loading file: RC_2500x1200_2014_us_54148.jpg"
## [1] "loading file: RC_2500x1200_2014_us_54157.jpg"
## [1] "loading file: RC_2500x1200_2014_us_54165.jpg"
## [1] "loading file: RC_2500x1200_2014_us_54172.jpg"
library(imager)
# Plot third shoe
# Convert image to imager format
img_imager <- imager::as.cimg(img)
## Warning in as.cimg.array(img): Assuming third dimension corresponds to colour
# Plot the image using base R plotting functions
plot(as.raster(img_imager), main = "RC_2500x1200_2014_us_53469.jpg")
# Plot the original and transposed images side by side
par(mfrow = c(1, 2))
plot(as.raster(img), main = "Original Image")
images_data <- t(images_data )
dim(images_data )
## [1] 9000000 17
# Create an array to store resized images
array_a <- array(rep(0, num * new_height * new_width * 3), dim = c(num, new_height, new_width, 3))
# Resize and store each image in the array
for (i in 1:num) {
temp <- resizeImage(readJPEG(jpeg_files[i]), new_height, new_width)
array_a[i, , , ] <- array(temp, dim = c(1, new_height, new_width, 3))
}
# Flatten the array and store pixel values in a data frame
flat <- matrix(0, num, prod(dim(array_a)))
for (i in 1:num) {
r <- as.vector(array_a[i, , , 1])
g <- as.vector(array_a[i, , , 2])
b <- as.vector(array_a[i, , , 3])
flat[i, ] <- t(c(r, g, b))
}
# Convert the flattened matrix to a data frame
shoes <- as.data.frame(t(flat))
# Define a function to plot JPEG images
plot_jpeg <- function(path, add = FALSE) {
jpg <- readJPEG(path, native = TRUE)
res <- dim(jpg)[2:1]
if (!add) {
plot(1, 1, xlim = c(1, res[1]), ylim = c(1, res[2]), asp = 1, type = "n",
xaxs = "i", yaxs = "i", xaxt = "n", yaxt = "n", xlab = "", ylab = "",
bty = "n")
}
rasterImage(jpg, 1, 1, res[1], res[2])
}
# Plot resized images
par(mfrow = c(3, 3))
par(mai = c(0.3, 0.3, 0.3, 0.3))
for (i in 1:num) {
plot_jpeg(writeJPEG(array_a[i, , , ]))
}
# Standardize the pixel values
scaled <- scale(shoes, center = TRUE, scale = TRUE)
# Extract mean and standard deviation of standardized pixel values
mean.shoe <- attr(scaled, "scaled:center")
std.shoe <- attr(scaled, "scaled:scale")
# Calculate the correlation matrix
Sigma <- cor(scaled)
sigma
## function (object, ...)
## UseMethod("sigma")
## <bytecode: 0x10f268390>
## <environment: namespace:stats>
# Compute eigenvalues of the correlation matrix
myeigen <- eigen(Sigma)
eigenvalues <- myeigen$values
eigenvalues
## [1] 11.61745316 1.70394885 0.87429472 0.47497591 0.33549455 0.28761630
## [7] 0.24989310 0.21531927 0.17872717 0.16921863 0.15382298 0.14492412
## [13] 0.14248337 0.12123246 0.11947244 0.11496463 0.09615834
# Extract eigenvectors of the correlation matrix
eigenvectors <- myeigen$vectors
# Display the first few rows of eigenvectors in a nice table format
knitr::kable(head(eigenvectors), format = "markdown")
| -0.2525635 | -0.0575731 | -0.1386619 | 0.3612579 | 0.3367958 | 0.0771434 | 0.5101649 | 0.4590323 | 0.1004874 | -0.0066637 | 0.0399923 | 0.2091592 | 0.0802011 | -0.0279929 | 0.1473921 | -0.3173218 | 0.0876285 |
| -0.2568621 | 0.2285982 | -0.0981094 | 0.2282754 | -0.0291860 | 0.1245893 | 0.1998114 | 0.1230211 | -0.2442517 | 0.0694919 | -0.0229789 | -0.2066317 | 0.2766962 | 0.0141970 | -0.1019089 | 0.7341851 | -0.1212779 |
| -0.1969535 | -0.3465766 | -0.2315417 | 0.6676894 | -0.1570505 | 0.2328825 | -0.4447300 | -0.2179298 | 0.0839843 | -0.0133999 | -0.0233552 | -0.0477726 | -0.0002179 | 0.0037787 | -0.0300553 | -0.0703296 | 0.0396419 |
| -0.2402368 | 0.3049035 | -0.1377157 | -0.1120160 | -0.3214305 | 0.3240779 | 0.0642133 | -0.1110570 | -0.0661180 | -0.0363408 | 0.3765760 | 0.4259917 | -0.2934239 | -0.2116585 | -0.1245587 | 0.0376213 | 0.3408931 |
| -0.2538292 | 0.2390470 | -0.0601217 | -0.0198136 | 0.3055220 | -0.1282683 | -0.2885175 | 0.0737153 | 0.1689848 | 0.2884454 | 0.2680998 | -0.1259732 | -0.3649987 | 0.4875641 | 0.2938786 | 0.1177442 | 0.0726146 |
| -0.2082231 | -0.3477848 | -0.4348510 | -0.3317499 | 0.0020995 | -0.1940209 | 0.0210512 | 0.0688897 | 0.1548276 | -0.2519525 | -0.2612493 | -0.1494615 | -0.0409279 | -0.0791256 | 0.1059181 | 0.2341891 | 0.4915866 |
scaled_data <- scale(images_data)
scaled_data [is.nan(scaled_data)] = 0
svd_decomp <- svd(scaled_data)
# Calculate cumulative variance explained by each eigenshoe
cumulative_variance <- cumsum(myeigen$values) / sum(myeigen$values)
cumulative_variance
## [1] 0.6833796 0.7836119 0.8350410 0.8629807 0.8827157 0.8996343 0.9143339
## [8] 0.9269998 0.9375131 0.9474672 0.9565156 0.9650405 0.9734219 0.9805532
## [15] 0.9875810 0.9943436 1.0000000
plot(svd_decomp$d^2 / sum(svd_decomp$d^2), type = "b", ylab = "Prop. of Variance Explained", pch = 19)
# Read and plot a sample JPEG image
testing_img <- readJPEG(jpeg_files[4])
plot(1:2, type = "n", main = "")
rasterImage(testing_img, 1, 1, 2, 2)
# Determine the number of principal components explaining at least 80% of the variance
num_comp <- which(cumulative_variance >= 0.8)[1]
num_comp
## [1] 3
# Calculate cumulative variance explained by each eigenshoe
cumulative_variance <- cumsum(myeigen$values) / sum(myeigen$values)
cumulative_variance
## [1] 0.6833796 0.7836119 0.8350410 0.8629807 0.8827157 0.8996343 0.9143339
## [8] 0.9269998 0.9375131 0.9474672 0.9565156 0.9650405 0.9734219 0.9805532
## [15] 0.9875810 0.9943436 1.0000000
# Plot cumulative variance explained by eigenshoes
ggplot(as.data.frame(cumulative_variance), aes(x = 1:num, cumulative_variance)) +
geom_line() + geom_point() + labs(x = "Number of eigenshoes", y = "Cumulative Variance") +
scale_x_continuous(breaks = seq(1, 17, by = 2)) + theme_minimal()
## Determine the number of principal components explaining at least 80%
of the variance
# Determine the number of principal components explaining at least 80% of the variance
num_comp <- which(cumulative_variance >= 0.8)[1]
num_comp
## [1] 3
### 80%
var_pct <- 0.8
vectors <- which(cumsum(svd_decomp$d^2 / sum(svd_decomp$d^2)) >= var_pct)[1]
print(paste0("Vectors to use: ", vectors))
## [1] "Vectors to use: 3"
var_pct <- 0.7
vectors <- which(cumsum(svd_decomp$d^2/sum(svd_decomp$d^2)) >= var_pct)[1]
print(paste0("Vectors to use: ", vectors))
## [1] "Vectors to use: 2"
var_pct <- 0.9
vectors <- which(cumsum(svd_decomp$d^2/sum(svd_decomp$d^2)) >= var_pct)[1]
print(paste0("Vectors to use: ", vectors))
## [1] "Vectors to use: 7"
var_pct <- 0.95
vectors <- which(cumsum(svd_decomp$d^2/sum(svd_decomp$d^2)) >= var_pct)[1]
print(paste0("Vectors to use: ", vectors))
## [1] "Vectors to use: 11"
var_pct <- 0.99
vectors <- which(cumsum(svd_decomp$d^2/sum(svd_decomp$d^2)) >= var_pct)[1]
print(paste0("Vectors to use: ", vectors))
## [1] "Vectors to use: 16"