Data 605 hw 4

Background

With the data file, build and visualize eigenimagery that accounts for 80% of the variability. Provide full R code and discussion.

Note: the size of the images take up so much memory that the computer is only able to output three images instead of all 13

Retreiving jpg files, then loading them into a matrix and into a dataframe

path <- "C:/Users/bobbt/Downloads/jpg"
img_list <- list.files(path, pattern = "\\.jpg$", full.names = TRUE)

images <- lapply(img_list, readJPEG)
img_dims <- dim(images[[1]])

# reshape the images into a matrix and then into a data frame:
image_df <- do.call(cbind, images)  |> as.data.frame()

# write the df to a file because of memory issues:
saveRDS(image_df, file = "image_df.RDS")

# also write img_dims:
saveRDS(img_dims, "img_dims")

# clear workspace:
rm(list = ls(all.names = TRUE))

# clear unused memory:
gc()
##           used  (Mb) gc trigger   (Mb)  max used   (Mb)
## Ncells 2014941 107.7    4158198  222.1   2401795  128.3
## Vcells 3463292  26.5  436335526 3329.0 471508097 3597.4

Reading the previously saved data frame containing image data, scale the data to center and normalize it, save the scaled data to a new file to alleviate memory concerns, and then clear the workspace and any unused memory.

image_df <- readRDS("image_df.RDS")

scaled_images <- scale(image_df, center = TRUE, scale = TRUE)

saveRDS(scaled_images, file = "scaled_images.RDS")

# clear workspace:
rm(list = ls(all.names = TRUE))

# clear unused memory:
gc()
##           used  (Mb) gc trigger   (Mb)  max used   (Mb)
## Ncells 2017180 107.8    4158198  222.1   2401795  128.3
## Vcells 3468633  26.5  592524552 4520.7 736968791 5622.7
# read in `scaled_images.RDS`:
scaled_images <- readRDS("scaled_images.RDS")

# compute the covariance matrix:
sigma <- cov(scaled_images)

# compute the eigenvalues and eigenvectors:
eig <- eigen(sigma)
eigenvalues <- eig$values
eigenvectors <- eig$vectors

# compute the cumulative variance:
cum_var <- cumsum(eigenvalues) / sum(eigenvalues)
cum_var
##  [1] 0.6833138 0.7824740 0.8353528 0.8629270 0.8825040 0.8996099 0.9144723
##  [8] 0.9271856 0.9374462 0.9472860 0.9561859 0.9647964 0.9732571 0.9804242
## [15] 0.9874038 0.9941511 1.0000000

Making sure that the variabiltiy is atleast 80%

threshold <- min(which(cum_var > .80))
threshold
## [1] 3

computing the eigenfaces:

scaling <- diag(eigenvalues[1:threshold]^(-1/2)) / sqrt(nrow(scaled_images) - 1)
eigenfaces <- scaled_images %*% eigenvectors[, seq_len(threshold)] %*% scaling

# read in `img_dims`:
img_dims <- readRDS("img_dims")

Displaying 3 of the 13 images due to memory issues

## attempting to go through each file and display them in rainbow scale
for (i in 1:min(3, ncol(eigenfaces))) {  
 
  eigenimage <- array(eigenfaces[,i], img_dims)
  eigenimage_gray <- apply(eigenimage, c(1, 2), mean)

  image(eigenimage_gray, col = rainbow(256))  

  Sys.sleep(1)
}

### source used to complete the project

https://rpubs.com/mrcuny/eigen-imagery