With the attached data file, build and visualize eigenimagery that accounts for 80% of the variability. Provide full R code and discussion.
The following code utilizes the code and information from the following sources.
library(jpeg)
library(OpenImageR)
library(imager)
library(EBImage)
Get the dimensions of an image to be used later on.
# Dimensions
image <- readJPEG(paste("jpg/","RC_2500x1200_2014_us_53446.jpg",sep=""))
# Image Dimensions
height <- nrow(image)
width <- ncol(image)
#Scaling Factor
scale <- 20
Display all 17 images of shoes.
# Images
names <- list.files(path = "jpg/", pattern = "jpg")
num = length(names)
par(mfrow=c(5,4))
par(mai=c(.02,.02,.02,.02))
for(x in names)
{
i <- readJPEG(paste("jpg/",x,sep=""),native=TRUE)
plot(0:1,0:1,type="n",ann=FALSE,axes=FALSE)
rasterImage(i,0,0,1,1)
}
All shoe images will be resized by a scale factor of 20 decreasing the dimensions of the images, as seen by the dimensions below. This requires less computational power and resources to perform analysis.
# Resize images
im <- array(rep(0,length(names)*height/scale*width/scale*3), dim=c(length(names), height/scale, width/scale,3))
for (i in 1:num){
temp=resizeImage(readJPEG(paste0("jpg/", names[i])),
height/scale,
width/scale)
if(i==1)
{
print(dim(readJPEG(paste0("jpg/", names[i]))))
print(dim(temp))
}
im[i,,,] =array(temp,dim=c(1, height/scale, width/scale,3))}
## [1] 1200 2500 3
## [1] 60 125 3
Collect the RGB vectors of each image in a list and add its transpose to the data matrix. Turn the data matrix into a dataframe.
# Collect RBG vectors of all images
# Create image dataframe
data <- matrix(0, num, prod(dim(im)))
for (i in 1:length(names)) {
r=as.vector(im[i,,,1])
g=as.vector(im[i,,,2])
b=as.vector(im[i,,,3])
data[i,] <- t(c(r, g, b))
}
images <- as.data.frame(t(data))
Scale the images.
# Scale images
scaled = scale(images, center = TRUE, scale = TRUE)
Compute the correlation/covariance matrix from the scaled matrix.
cov_m <- cor(scaled)
Compute the eigenvalues and eigenvectors of the covarience matrix.
e <- eigen(cov_m)
eigenvalues <- e$values
eigenvectors <- e$vectors
Calculate the average eigenvalues and find the smallest value that gives 80% variability (threshold).
cum.var <- cumsum(eigenvalues) / sum(eigenvalues)
cum.var
## [1] 0.6833797 0.7836120 0.8350410 0.8629807 0.8827157 0.8996344 0.9143339
## [8] 0.9269997 0.9375131 0.9474671 0.9565155 0.9650405 0.9734219 0.9805532
## [15] 0.9875810 0.9943436 1.0000000
thres <- min(which(cum.var >= 0.80))
Calculate the Eigenimages matrix by multiplying the original matrix by eigenvectors ties the eigenvalues diagonal matrix.
scaling = diag(eigenvalues[1:thres]^(-1/2)) / (sqrt(nrow(scaled)-1))
eigenimages <- scaled%*%eigenvectors[,1:thres]%*%scaling
# Show image that accounts for 80% variability for first image
par(mfrow=c(2,3))
imageShow(array(eigenimages[,1], c(60,125,3)))
Calculate the principal components
# Principal components
im_copy <- im
dim(im_copy) <- c(num,height*width*3/scale^2)
pc <- princomp(t(as.matrix(im_copy)), scores=TRUE, cor=TRUE)
pc2 <- t(pc$scores)
dim(pc2)<- c(num,height/scale,width/scale,3)
par(mfrow=c(5,5))
par(mai=c(.001,.001,.001,.001))
for(i in 1:num)
{
x <- readJPEG(writeJPEG(pc2[i,,,], bg="white"))
plot(0:1,0:1,type="n",ann=FALSE,axes=FALSE)
rasterImage(x,0,0,1,1)
}
v=round(pc$sdev[1:num]^2/ sum(pc$sdev^2),3)
cumsum(v)
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8 Comp.9 Comp.10
## 0.683 0.783 0.834 0.862 0.882 0.899 0.914 0.927 0.938 0.948
## Comp.11 Comp.12 Comp.13 Comp.14 Comp.15 Comp.16 Comp.17
## 0.957 0.966 0.974 0.981 0.988 0.995 1.001
New dataset
d <- t(t(eigenimages)%*%scaled)