605: eigenshoes

Jie Zou

2021-09-19

load libraries

library(jpeg)
library(EBImage)
library(OpenImageR)
## 
## Attaching package: 'OpenImageR'
## The following objects are masked from 'package:EBImage':
## 
##     readImage, writeImage

get jpeg files

# list file under directory
files = list.files("./jpg", pattern = "\\.jpg")

# the number of files under the directory
file_num = length(files)

load data into an array

# heights of each scaled image
height = 1200/20

# width of each scaled image
width = 2500/20

# RGB: three color
RGB = 3

# initialize a zero array to store images
# so the dimension of such array will be (file_num x image_height x image_width x RGB)
# the total array length will be the product of dimension
image_arr = array(rep(0, file_num*height*width*3), dim = c(file_num, height, width, RGB))

# resize images individually 
# based on each image file, create an array to store image pixels
for (i in seq(file_num)){
  tem = resize(readJPEG(paste0("./jpg/",files[i])), height, width)
  image_arr[i,,,] = array(tem, dim = c(1,height,width,3))
}

#double check the dimensio of this array
dim(image_arr)
## [1]  17  60 125   3

vectorize

# initialized a zero matrix
flatten = matrix(data = 0, nrow = file_num, ncol = prod(dim(image_arr)))

# extract RGB
for (i in seq(file_num)){
  r = as.vector(image_arr[i,,,1])
  g = as.vector(image_arr[i,,,2])
  b = as.vector(image_arr[i,,,3])
  flatten[i,] = c(r,g,b)
}

shoes = as.data.frame(t(flatten))

dim(shoes)
## [1] 382500     17
str(shoes)
## 'data.frame':    382500 obs. of  17 variables:
##  $ V1 : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ V2 : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ V3 : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ V4 : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ V5 : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ V6 : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ V7 : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ V8 : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ V9 : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ V10: num  1 1 1 1 1 1 1 1 1 1 ...
##  $ V11: num  1 1 1 1 1 1 1 1 1 1 ...
##  $ V12: num  1 1 1 1 1 1 1 1 1 1 ...
##  $ V13: num  1 1 1 1 1 1 1 1 1 1 ...
##  $ V14: num  1 1 1 1 1 1 1 1 1 1 ...
##  $ V15: num  1 1 1 1 1 1 1 1 1 1 ...
##  $ V16: num  1 1 1 1 1 1 1 1 1 1 ...
##  $ V17: num  1 1 1 1 1 1 1 1 1 1 ...

plot the original image

# combine images in one plane
par(mfrow = c(3,3)) # image display in 3 x 3
par(mai = c(.4,.4,.4,.4)) # inner margin

# plot image
  for( i in seq(file_num)){
    # read file
    jpg = readJPEG(writeJPEG(image_arr[i,,,]) ,native = T)
  
    # get width and height
    width_height = dim(jpg)[2:1]
  
    # create plot area
    plot(1,1,xlim = c(1,width_height[1]), ylim = c(1, width_height[2]),xlab = '',ylab = '')
  
  # display raster image with width and height
    rasterImage(jpg, 1,1,width_height[1],width_height[2])
    
    
  }

calculate correlation

# center and scale data
scaled_data = scale(shoes)

# calculate correlation matrix
sig = cor(scaled_data)

get eigencomponent

# compute eigencomponent
eig = eigen(sig)

# show eigenvalues
eig$values
##  [1] 11.77794388  1.72081925  0.86806022  0.46371734  0.32298978  0.27624335
##  [7]  0.23790552  0.20503228  0.16487012  0.15399147  0.14378933  0.13577975
## [13]  0.12975748  0.11385581  0.10510016  0.10124270  0.07890155

eigenshoes

# variability for these eigenvalue
cumsum(eig$values) / sum(eig$values)
##  [1] 0.6928202 0.7940449 0.8451073 0.8723847 0.8913841 0.9076338 0.9216282
##  [8] 0.9336889 0.9433872 0.9524455 0.9609037 0.9688907 0.9765235 0.9832209
## [15] 0.9894033 0.9953587 1.0000000
# 80% of variability is between 2 and 3  component. component 2 is closer to 80%.
var_80 = diag(eig$values[1:2] ^ (-1/2)) / sqrt(nrow(scaled_data)-1)

var_80
##              [,1]        [,2]
## [1,] 0.0004711401 0.000000000
## [2,] 0.0000000000 0.001232586
# compute eigenshoes
eigenshoes = scaled_data %*% eig$vectors[,1:2] %*% var_80

# show image
for(i in 1:2){
  plot(1,1,xlim = c(1,width_height[1]), ylim = c(1, width_height[2]),xlab = '',ylab = '')
  imageShow(array(eigenshoes[,i], c(height, width, RGB)))
}