load libraries
library(jpeg)
library(EBImage)
library(OpenImageR)
##
## Attaching package: 'OpenImageR'
## The following objects are masked from 'package:EBImage':
##
## readImage, writeImage
get jpeg files
# list file under directory
files = list.files("./jpg", pattern = "\\.jpg")
# the number of files under the directory
file_num = length(files)
load data into an array
# heights of each scaled image
height = 1200/20
# width of each scaled image
width = 2500/20
# RGB: three color
RGB = 3
# initialize a zero array to store images
# so the dimension of such array will be (file_num x image_height x image_width x RGB)
# the total array length will be the product of dimension
image_arr = array(rep(0, file_num*height*width*3), dim = c(file_num, height, width, RGB))
# resize images individually
# based on each image file, create an array to store image pixels
for (i in seq(file_num)){
tem = resize(readJPEG(paste0("./jpg/",files[i])), height, width)
image_arr[i,,,] = array(tem, dim = c(1,height,width,3))
}
#double check the dimensio of this array
dim(image_arr)
## [1] 17 60 125 3
vectorize
# initialized a zero matrix
flatten = matrix(data = 0, nrow = file_num, ncol = prod(dim(image_arr)))
# extract RGB
for (i in seq(file_num)){
r = as.vector(image_arr[i,,,1])
g = as.vector(image_arr[i,,,2])
b = as.vector(image_arr[i,,,3])
flatten[i,] = c(r,g,b)
}
shoes = as.data.frame(t(flatten))
dim(shoes)
## [1] 382500 17
## 'data.frame': 382500 obs. of 17 variables:
## $ V1 : num 1 1 1 1 1 1 1 1 1 1 ...
## $ V2 : num 1 1 1 1 1 1 1 1 1 1 ...
## $ V3 : num 1 1 1 1 1 1 1 1 1 1 ...
## $ V4 : num 1 1 1 1 1 1 1 1 1 1 ...
## $ V5 : num 1 1 1 1 1 1 1 1 1 1 ...
## $ V6 : num 1 1 1 1 1 1 1 1 1 1 ...
## $ V7 : num 1 1 1 1 1 1 1 1 1 1 ...
## $ V8 : num 1 1 1 1 1 1 1 1 1 1 ...
## $ V9 : num 1 1 1 1 1 1 1 1 1 1 ...
## $ V10: num 1 1 1 1 1 1 1 1 1 1 ...
## $ V11: num 1 1 1 1 1 1 1 1 1 1 ...
## $ V12: num 1 1 1 1 1 1 1 1 1 1 ...
## $ V13: num 1 1 1 1 1 1 1 1 1 1 ...
## $ V14: num 1 1 1 1 1 1 1 1 1 1 ...
## $ V15: num 1 1 1 1 1 1 1 1 1 1 ...
## $ V16: num 1 1 1 1 1 1 1 1 1 1 ...
## $ V17: num 1 1 1 1 1 1 1 1 1 1 ...
plot the original image
# combine images in one plane
par(mfrow = c(3,3)) # image display in 3 x 3
par(mai = c(.4,.4,.4,.4)) # inner margin
# plot image
for( i in seq(file_num)){
# read file
jpg = readJPEG(writeJPEG(image_arr[i,,,]) ,native = T)
# get width and height
width_height = dim(jpg)[2:1]
# create plot area
plot(1,1,xlim = c(1,width_height[1]), ylim = c(1, width_height[2]),xlab = '',ylab = '')
# display raster image with width and height
rasterImage(jpg, 1,1,width_height[1],width_height[2])
}


calculate correlation
# center and scale data
scaled_data = scale(shoes)
# calculate correlation matrix
sig = cor(scaled_data)
get eigencomponent
# compute eigencomponent
eig = eigen(sig)
# show eigenvalues
eig$values
## [1] 11.77794388 1.72081925 0.86806022 0.46371734 0.32298978 0.27624335
## [7] 0.23790552 0.20503228 0.16487012 0.15399147 0.14378933 0.13577975
## [13] 0.12975748 0.11385581 0.10510016 0.10124270 0.07890155
eigenshoes
# variability for these eigenvalue
cumsum(eig$values) / sum(eig$values)
## [1] 0.6928202 0.7940449 0.8451073 0.8723847 0.8913841 0.9076338 0.9216282
## [8] 0.9336889 0.9433872 0.9524455 0.9609037 0.9688907 0.9765235 0.9832209
## [15] 0.9894033 0.9953587 1.0000000
# 80% of variability is between 2 and 3 component. component 2 is closer to 80%.
var_80 = diag(eig$values[1:2] ^ (-1/2)) / sqrt(nrow(scaled_data)-1)
var_80
## [,1] [,2]
## [1,] 0.0004711401 0.000000000
## [2,] 0.0000000000 0.001232586
# compute eigenshoes
eigenshoes = scaled_data %*% eig$vectors[,1:2] %*% var_80
# show image
for(i in 1:2){
plot(1,1,xlim = c(1,width_height[1]), ylim = c(1, width_height[2]),xlab = '',ylab = '')
imageShow(array(eigenshoes[,i], c(height, width, RGB)))
}

