| - title:Assignment 4 |
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
The shoes are first loaded and resized to 120x250.
files <- list.files(pattern = "\\jpg$")
height=120;
width=250;
fhi = matrix(1, nrow = 3, ncol = 3)
fhi[2, 2] = -8
plot_jpeg = function(path, add=FALSE)
{ jpg = readJPEG(path, native=T) # read the file
res = dim(jpg)[2:1] # get the resolution, [x, y]
if (!add) # initialize an empty plot area if add==FALSE
plot(1,1,xlim=c(1,res[1]),ylim=c(1,res[2]),asp=1,type='n',xaxs='i',yaxs='i',xaxt='n',yaxt='n',xlab='',ylab='',bty='n')
rasterImage(jpg,1,1,res[1],res[2])
}
im=array(rep(0,length(files)*height*width*3), dim=c(length(files), height, width,3))
for (i in 1:length(files)){
temp= EBImage::resize(readJPEG(files[i]),120, 250)
im[i,,,]=array(temp,dim=c(1, 120, 250,3))
}
flat=matrix(0, length(files), prod(dim(im)))
for (i in 1:length(files)){
r=as.vector(im[i,,,1]); g=as.vector(im[i,,,2]);b=as.vector(im[i,,,3])
flat[i,] <- t(c(r, g, b))
}
shoes=as.data.frame(t(flat))
par(mfrow=c(3,3))
par(mai=c(.3,.3,.3,.3))
for (i in 1:length(files)){
plot_jpeg(writeJPEG(im[i,,,]))
}
scaled=scale(shoes, center = TRUE, scale = TRUE)
Sigma_=cor(scaled)
myeigen=eigs(Sigma_,5,which="LM")
cumsum(myeigen$values) / sum(eigen(Sigma_)$values)
## [1] 0.6916309 0.7911549 0.8442585 0.8714110 0.8903922
We see that even only using 5 EigenValues we are already well above 80% of the variability.
scaling=diag(myeigen$values[1:5]^(-1/2)) / (sqrt(nrow(scaled)-1))
eigenshoes=scaled%*%myeigen$vectors[,1:5]%*%scaling
newdata=im
dim(newdata)=c(length(files),height*width*3)
mypca=princomp(t(as.matrix(newdata)), scores=TRUE, cor=TRUE)
pcaScores=t(mypca$scores)
dim(pcaScores)=c(length(files),height,width,3)
par(mfrow=c(5,5))
par(mai=c(.001,.001,.001,.001))
for (i in 1:length(files)){
plot_jpeg(writeJPEG(pcaScores[i,,,], bg="white"))
}
In viewing the images we see that there is quite a bit of fine detail that is tripping it up.
Blurring the images might allow a certain refinement.
for (i in 1:length(files)){
temp= EBImage::gblur(EBImage::resize(readJPEG(files[i]),120, 250),sigma = 8, radius=5)
im[i,,,]=array(temp,dim=c(1, 120, 250,3))
}
flat=matrix(0, length(files), prod(dim(im)))
for (i in 1:length(files)){
r=as.vector(im[i,,,1]); g=as.vector(im[i,,,2]);b=as.vector(im[i,,,3])
flat[i,] <- t(c(r, g, b))
}
shoes=as.data.frame(t(flat))
par(mfrow=c(3,3))
par(mai=c(.3,.3,.3,.3))
for (i in 1:length(files)){
plot_jpeg(writeJPEG(im[i,,,]))
}
scaled=scale(shoes, center = TRUE, scale = TRUE)
Sigma_=cor(scaled)
myeigen=eigs(Sigma_,5,which="LM")
cumsum(myeigen$values) / sum(eigen(Sigma_)$values)
## [1] 0.7705036 0.8742939 0.9197217 0.9385080 0.9517913
We see that effacing the small details has of course improved the variability accounted for. These images seem a more reflective of the actual differences.
pcaScores=t(mypca$scores)
dim(pcaScores)=c(length(files),height,width,3)
par(mfrow=c(5,5))
par(mai=c(.001,.001,.001,.001))
for (i in 1:length(files)){
plot_jpeg(writeJPEG(pcaScores[i,,,], bg="white"))
}