- title:Assignment 4

Assignment 4

## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

The shoes are first loaded and resized to 120x250.

files <- list.files(pattern = "\\jpg$")
height=120; 
width=250;
fhi = matrix(1, nrow = 3, ncol = 3)
fhi[2, 2] = -8
plot_jpeg = function(path, add=FALSE)
{ jpg = readJPEG(path, native=T) # read the file
  res = dim(jpg)[2:1] # get the resolution, [x, y]
  if (!add) # initialize an empty plot area if add==FALSE
    plot(1,1,xlim=c(1,res[1]),ylim=c(1,res[2]),asp=1,type='n',xaxs='i',yaxs='i',xaxt='n',yaxt='n',xlab='',ylab='',bty='n')
  rasterImage(jpg,1,1,res[1],res[2])
}
im=array(rep(0,length(files)*height*width*3), dim=c(length(files), height, width,3))

for (i in 1:length(files)){

  temp= EBImage::resize(readJPEG(files[i]),120, 250)
  im[i,,,]=array(temp,dim=c(1, 120, 250,3))
}
flat=matrix(0, length(files), prod(dim(im)))
for (i in 1:length(files)){
  r=as.vector(im[i,,,1]); g=as.vector(im[i,,,2]);b=as.vector(im[i,,,3])
  flat[i,] <- t(c(r, g, b))
}
shoes=as.data.frame(t(flat))
par(mfrow=c(3,3))
par(mai=c(.3,.3,.3,.3))
for (i in 1:length(files)){ 
plot_jpeg(writeJPEG(im[i,,,]))
}

scaled=scale(shoes, center = TRUE, scale = TRUE)
Sigma_=cor(scaled)
myeigen=eigs(Sigma_,5,which="LM")
cumsum(myeigen$values) / sum(eigen(Sigma_)$values)
## [1] 0.6916309 0.7911549 0.8442585 0.8714110 0.8903922

We see that even only using 5 EigenValues we are already well above 80% of the variability.

scaling=diag(myeigen$values[1:5]^(-1/2)) / (sqrt(nrow(scaled)-1))
eigenshoes=scaled%*%myeigen$vectors[,1:5]%*%scaling
newdata=im
dim(newdata)=c(length(files),height*width*3)
mypca=princomp(t(as.matrix(newdata)), scores=TRUE, cor=TRUE)
pcaScores=t(mypca$scores)
dim(pcaScores)=c(length(files),height,width,3)
par(mfrow=c(5,5))
par(mai=c(.001,.001,.001,.001))
for (i in 1:length(files)){
plot_jpeg(writeJPEG(pcaScores[i,,,], bg="white"))  
}

In viewing the images we see that there is quite a bit of fine detail that is tripping it up.

Blurring the images might allow a certain refinement.

for (i in 1:length(files)){

  temp= EBImage::gblur(EBImage::resize(readJPEG(files[i]),120, 250),sigma = 8, radius=5)
  im[i,,,]=array(temp,dim=c(1, 120, 250,3))
}
flat=matrix(0, length(files), prod(dim(im)))
for (i in 1:length(files)){
  r=as.vector(im[i,,,1]); g=as.vector(im[i,,,2]);b=as.vector(im[i,,,3])
  flat[i,] <- t(c(r, g, b))
}
shoes=as.data.frame(t(flat))
par(mfrow=c(3,3))
par(mai=c(.3,.3,.3,.3))
for (i in 1:length(files)){ 
plot_jpeg(writeJPEG(im[i,,,]))
}

scaled=scale(shoes, center = TRUE, scale = TRUE)
Sigma_=cor(scaled)
myeigen=eigs(Sigma_,5,which="LM")
cumsum(myeigen$values) / sum(eigen(Sigma_)$values)
## [1] 0.7705036 0.8742939 0.9197217 0.9385080 0.9517913

We see that effacing the small details has of course improved the variability accounted for. These images seem a more reflective of the actual differences.

pcaScores=t(mypca$scores)
dim(pcaScores)=c(length(files),height,width,3)
par(mfrow=c(5,5))
par(mai=c(.001,.001,.001,.001))
for (i in 1:length(files)){
plot_jpeg(writeJPEG(pcaScores[i,,,], bg="white"))  
}