HW4

With the attached data file, build and visualize eigenimagery that accounts for 80% of the variability.

Going to start with one shoe

library(jpeg)
library(EBImage)

Plot function

plot_jpeg = function(path, add=FALSE) #initialize function
{
  require('jpeg')
  jpg = readJPEG(path, native=T) # read the file
  res = dim(jpg)[2:1] # get the resolution, [x is 2, y is 1]
  if (!add) # initialize an empty plot area if add==FALSE
    plot(1,1,xlim=c(1,res[1]),ylim=c(1,res[2]), #set the X Limits by size
         asp=1, #aspect ratio
         type='n', #don't plot
         xaxs='i',yaxs='i',#prevents expanding axis windows +6% as normal
         xaxt='n',yaxt='n',xlab='',ylab='', # no axes or labels
         bty='n') # no box around graph
  rasterImage(jpg,1,1,res[1],res[2]) #image, xleft,ybottom,xright,ytop
}

Load a single shoe

single_shoe_file_path <- 'C:\\Users\\NBMF48\\Desktop\\SPS\\Data605\\HomeWork\\HW4\\jpg\\RC_2500x1200_2014_us_53446.jpg'
single_shoe_img <- readJPEG(single_shoe_file_path, native = TRUE)

readJPEG() has created a raster array of 3 channels corresponding to the intensity of each RGB pixel.

Now lets plot our shoe

plot(1, 1, # begin point of image is co-ordinate (1, 1)
     xlim = c(1, dim(single_shoe_img)[2]), 
     ylim = c(1, dim(single_shoe_img)[1]),
     asp = 1, # setting aspect ratio
     type = 'n', # don't plot anything
     xaxs = 'i', yaxs = 'i', # prevents expanding axis window
     xaxt = 'n', yaxt = 'n', xlab = '', ylab = '', # no axes or labels
     bty = 'n', # no boundary around plot
)

# plot shoe
rasterImage(single_shoe_img, 1, 1, dim(single_shoe_img)[2], dim(single_shoe_img)[1])

We are going to now “open” the single_shoe_img by storing the individual pixels in an array. Doing this will put all the attributes of single_shoe_img into one variable. We are also going to define a reduction scale

scale <- 20
height <- dim(single_shoe_img)[1]
width <- dim(single_shoe_img)[2]
scaled_height <- height/scale # reduce height of image
scaled_width <- width/scale # reduce width of image

temp <- resize(single_shoe_img, scaled_height, scaled_width)

# Opened image
single_shoe_image_opened <- array(temp, dim = c(1, scaled_height, scaled_width, 3))

To perform Primary Component Analysis (PCA) on the single_shoe_image, we have to turn the 4D data (sing_shoe_image_opened) to a 1D data. Basically we flatten the data

single_shoe_image_flatten <- single_shoe_image_opened

# change the dimension to flatten the data
dim(single_shoe_image_flatten) <- c(1, scaled_height*scaled_width*3)

Perform PCA

single_shoe_image_pca <- princomp(t(as.matrix(single_shoe_image_flatten)), scores = TRUE, cor = TRUE)

# verify the sum of the variance = 1
sum(single_shoe_image_pca$sdev^2/sum(single_shoe_image_pca$sdev^2))
## [1] 1

From the PCA plot the Eigenshoe

single_shoe_image_pca_transposed = t(single_shoe_image_pca$scores)

# change the dimension back to a 4D array
dim(single_shoe_image_pca_transposed) <- c(1, scaled_height, scaled_width, 3)
par(mai=c(.001,.001,.001,.001))

# Can not get the eigen image of one show to work. Not sure what I am doing wrong
plot_jpeg(writeJPEG(single_shoe_image_pca_transposed[,,,], quality = 1, bg = 'white'))

Working on All Shoes

Load all shoes

file_path <- 'C:\\Users\\NBMF48\\Desktop\\SPS\\Data605\\HomeWork\\HW4\\jpg'

number_of_images <- length(list.files(file_path, pattern = '\\.jpg'))
shoes <- list.files(file_path, pattern = '\\.jpg')

# constant vectors corresponding to height and width of images
height = 1200
width = 2500
scale = 20
par(mfrow = c(5, 5))
par(mai=c(0.05,.05,.05,.05))
for(i in 1:number_of_images){
  plot_jpeg(paste0(file_path, '\\', shoes[i]))
}

Open up image by turning the image data into a 4D array

# define an array of all **0** (382500 zeroes) with dim 17 by 60 by 125 by 3
images <- array(rep(0,length(shoes)*height/scale*width/scale*3), 
            dim=c(length(shoes), 
                  height/scale, 
                  width/scale,
                  3
                  )
            )

for(i in 1:number_of_images){
  temp_file_path <- paste0(file_path, '\\', shoes[i])
  temp <- resize(readJPEG(temp_file_path), height/scale, height/scale)
  images[i,,,] <- array(temp, dim = c(1, height/scale, width/scale, 3))
}

store a rgb intensities as vector

#define a matrix of all zeros with 17 rows and number of columns = number of elements
flat <- matrix(0, 17, prod(dim(images)))
for(i in 1:number_of_images){
  r <- as.vector(images[i,,,1])
  g <- as.vector(images[i,,,2])
  b <- as.vector(images[i,,,3])
  flat[i,] <- t(c(r, g, b))
}

shoes_vector <- as.data.frame((t(flat)))

Perfomeing PCA on all the shoes

shoes_pca <- princomp(shoes_vector, scores = TRUE, cor = TRUE)
# test to make sure our total variance is 1
print(sum(shoes_pca$sdev^2/sum(shoes_pca$sdev^2)))
## [1] 1

Check to see how many shoes before we get to a variance of 80%

shoes_variance <- shoes_pca$sdev^2/sum(shoes_pca$sdev^2)
variance_sum_array <- array()
total_sum = 0
for(i in 1:length(shoes_variance)){
  if(total_sum <= 0.8){
    total_sum <- total_sum + shoes_variance[i]
    variance_sum_array[i] <- total_sum
  }
  else{
    print(paste0('Number of shoes needed for a variance of 80%: ', i - 1))
    break
  } 
    
}
## [1] "Number of shoes needed for a variance of 80%: 3"
variance_sum_array
## [1] 0.6907322 0.7905771 0.8430798

Thus the eigen image of the 3 shoes

eigen_shoe <- t(shoes_pca$scores)
#dim(eigen_shoe) <- c(length(shoes), height/scale, width/scale, 3)
#par(mfrow = c(1, 3))
#par(mai= c(0.05, 0.05, 0.05, 0.05))
#for(i in 1:3){ # only need to plot the first three
 # plot_jpeg(writeJPEG(eigen_shoe[i,,,], quality = 1, bg = 'white'))
#}

I couldnt get my plot of eigenimages to work. Commented out the code above so I could knit