With the data file, build and visualize eigenimagery that accounts for 80% of the variability. Provide full R code and discussion.
Note: the size of the images take up so much memory that the computer is only able to output three images instead of all 13
path <- "C:/Users/bobbt/Downloads/jpg"
img_list <- list.files(path, pattern = "\\.jpg$", full.names = TRUE)
images <- lapply(img_list, readJPEG)
img_dims <- dim(images[[1]])
# reshape the images into a matrix and then into a data frame:
image_df <- do.call(cbind, images) |> as.data.frame()
# write the df to a file because of memory issues:
saveRDS(image_df, file = "image_df.RDS")
# also write img_dims:
saveRDS(img_dims, "img_dims")
# clear workspace:
rm(list = ls(all.names = TRUE))
# clear unused memory:
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 2014941 107.7 4158198 222.1 2401795 128.3
## Vcells 3463292 26.5 436335526 3329.0 471508097 3597.4
image_df <- readRDS("image_df.RDS")
scaled_images <- scale(image_df, center = TRUE, scale = TRUE)
saveRDS(scaled_images, file = "scaled_images.RDS")
# clear workspace:
rm(list = ls(all.names = TRUE))
# clear unused memory:
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 2017180 107.8 4158198 222.1 2401795 128.3
## Vcells 3468633 26.5 592524552 4520.7 736968791 5622.7
# read in `scaled_images.RDS`:
scaled_images <- readRDS("scaled_images.RDS")
# compute the covariance matrix:
sigma <- cov(scaled_images)
# compute the eigenvalues and eigenvectors:
eig <- eigen(sigma)
eigenvalues <- eig$values
eigenvectors <- eig$vectors
# compute the cumulative variance:
cum_var <- cumsum(eigenvalues) / sum(eigenvalues)
cum_var
## [1] 0.6833138 0.7824740 0.8353528 0.8629270 0.8825040 0.8996099 0.9144723
## [8] 0.9271856 0.9374462 0.9472860 0.9561859 0.9647964 0.9732571 0.9804242
## [15] 0.9874038 0.9941511 1.0000000
threshold <- min(which(cum_var > .80))
threshold
## [1] 3
scaling <- diag(eigenvalues[1:threshold]^(-1/2)) / sqrt(nrow(scaled_images) - 1)
eigenfaces <- scaled_images %*% eigenvectors[, seq_len(threshold)] %*% scaling
# read in `img_dims`:
img_dims <- readRDS("img_dims")
Displaying 3 of the 13 images due to memory issues
## attempting to go through each file and display them in rainbow scale
for (i in 1:min(3, ncol(eigenfaces))) {
eigenimage <- array(eigenfaces[,i], img_dims)
eigenimage_gray <- apply(eigenimage, c(1, 2), mean)
image(eigenimage_gray, col = rainbow(256))
Sys.sleep(1)
}
### source used to complete the project